2016-11-23 06:04:50 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2015-07-04 04:07:57 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
|
2016-11-23 06:04:50 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ
|
2015-07-04 04:07:57 +08:00
|
|
|
;
|
|
|
|
; 32-bit tests to make sure we're not doing anything stupid.
|
|
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown
|
|
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
|
|
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
|
2015-05-02 19:18:47 +08:00
|
|
|
|
|
|
|
;
|
|
|
|
; Double to Signed Integer
|
|
|
|
;
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptosi_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm1
|
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptosi_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptosi_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-24 22:46:55 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
ret <2 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2016-08-23 23:00:52 +08:00
|
|
|
define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
|
|
|
|
; SSE-LABEL: fptosi_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-08-23 23:00:52 +08:00
|
|
|
; AVX-LABEL: fptosi_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
2015-05-02 19:18:47 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <2 x double> %a to <2 x i32>
|
2016-08-23 23:10:39 +08:00
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
2015-05-02 19:18:47 +08:00
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
2016-08-23 23:00:52 +08:00
|
|
|
define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
|
|
|
|
; SSE-LABEL: fptosi_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
2016-08-23 23:00:52 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: fptosi_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
2016-08-23 23:00:52 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <2 x double> %a to <2 x i32>
|
|
|
|
ret <2 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-19 01:05:39 +08:00
|
|
|
define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
|
|
|
|
; SSE-LABEL: fptosi_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
2017-09-18 11:29:54 +08:00
|
|
|
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX-LABEL: fptosi_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
2015-07-19 01:05:39 +08:00
|
|
|
%ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%cvt = fptosi <4 x double> %ext to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm2
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: movdqa %xmm3, %xmm1
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-11-23 22:01:18 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptosi <4 x double> %a to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
|
2017-09-18 11:29:54 +08:00
|
|
|
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX-LABEL: fptosi_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0
|
|
|
|
; AVX-NEXT: vzeroupper
|
|
|
|
; AVX-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptosi <4 x double> %a to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; Double to Unsigned Integer
|
|
|
|
;
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; SSE-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm3
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rcx
|
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm2, %rax
|
|
|
|
; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rdx
|
|
|
|
; VEX-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; VEX-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-24 22:46:55 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <2 x double> %a to <2 x i64>
|
|
|
|
ret <2 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2016-11-06 15:50:25 +08:00
|
|
|
define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) {
|
|
|
|
; SSE-LABEL: fptoui_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; SSE-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rax
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rdx
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm3
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rcx
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: cmovaeq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: pxor %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
|
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm2, %rax
|
|
|
|
; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rdx
|
|
|
|
; VEX-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; VEX-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: retq
|
2016-11-06 15:50:25 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2016-11-06 15:50:25 +08:00
|
|
|
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512F-NEXT: vzeroupper
|
2016-11-06 15:50:25 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
|
|
|
|
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2016-11-06 15:50:25 +08:00
|
|
|
%cvt = fptoui <2 x double> %a to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; SSE-NEXT: movapd %xmm0, %xmm2
|
|
|
|
; SSE-NEXT: subsd %xmm1, %xmm2
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm2, %rax
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm2
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
|
|
|
; SSE-NEXT: subsd %xmm1, %xmm3
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rcx
|
|
|
|
; SSE-NEXT: ucomisd %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm2, %rax
|
|
|
|
; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rdx
|
|
|
|
; VEX-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; VEX-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512F-NEXT: vzeroupper
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <2 x double> %a to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
2015-07-19 01:05:39 +08:00
|
|
|
define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
|
|
|
|
; SSE-LABEL: fptoui_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; SSE-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rax
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rdx
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rdx
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm0
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cmovbq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[0,2]
|
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vmovd %ecx, %xmm0
|
|
|
|
; VEX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
|
2017-04-07 06:33:25 +08:00
|
|
|
; VEX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-LABEL: fptoui_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512F-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptoui_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512VL-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512VLDQ-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-19 01:05:39 +08:00
|
|
|
%ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
%cvt = fptoui <4 x double> %ext to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movapd %xmm0, %xmm2
|
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
|
|
|
|
; SSE-NEXT: subsd %xmm3, %xmm0
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rcx
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm2, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm3, %xmm2
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm2, %xmm4
|
|
|
|
; SSE-NEXT: subsd %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm2, %rdx
|
|
|
|
; SSE-NEXT: ucomisd %xmm3, %xmm2
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm2
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
|
|
|
; SSE-NEXT: movapd %xmm1, %xmm2
|
|
|
|
; SSE-NEXT: subsd %xmm3, %xmm2
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm2, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm3, %xmm1
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm2
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: subsd %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: ucomisd %xmm3, %xmm1
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
|
|
|
|
; AVX1-NEXT: vucomisd %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm4, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm2, %rdx
|
|
|
|
; AVX1-NEXT: vucomisd %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rdx
|
|
|
|
; AVX1-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm4, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; AVX1-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
|
|
; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm2, %rdx
|
|
|
|
; AVX2-NEXT: vucomisd %xmm1, %xmm2
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
|
|
|
|
; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm4, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm2, %rdx
|
|
|
|
; AVX2-NEXT: vucomisd %xmm1, %xmm2
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm0, %rdx
|
|
|
|
; AVX2-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm4, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; AVX2-NEXT: vucomisd %xmm1, %xmm0
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-11-23 22:01:18 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <4 x double> %a to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
|
|
|
; SSE-NEXT: movapd %xmm1, %xmm3
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm3
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm3, %rcx
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm4
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rdx
|
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: movapd %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm1, %rcx
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm4
|
|
|
|
; SSE-NEXT: subsd %xmm2, %xmm4
|
|
|
|
; SSE-NEXT: cvttsd2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttsd2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: ucomisd %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
|
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm1, %rax
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; VEX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
|
|
|
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vcvttsd2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: vzeroupper
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-LABEL: fptoui_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512F-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptoui_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512VL-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512VLDQ-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <4 x double> %a to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; Float to Signed Integer
|
|
|
|
;
|
|
|
|
|
2016-08-23 23:00:52 +08:00
|
|
|
define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
|
|
|
|
; SSE-LABEL: fptosi_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
2016-08-23 23:00:52 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: fptosi_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-18 15:42:15 +08:00
|
|
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
2016-08-23 23:00:52 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <2 x float> %a to <2 x i32>
|
|
|
|
ret <2 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 19:18:47 +08:00
|
|
|
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <4 x float> %a to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-12-11 03:35:39 +08:00
|
|
|
; VEX-LABEL: fptosi_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm1
|
|
|
|
; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; VEX-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptosi_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptosi_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2018-01-02 15:30:53 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2018-01-02 15:30:53 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-18 23:56:33 +08:00
|
|
|
%shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptosi <2 x float> %shuf to <2 x i64>
|
|
|
|
ret <2 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-19 01:05:39 +08:00
|
|
|
define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
|
|
|
|
; SSE-LABEL: fptosi_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptosi_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm1
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptosi_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX512F-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptosi_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX512VL-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptosi_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2016-11-23 22:01:18 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512VLDQ-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-19 01:05:39 +08:00
|
|
|
%cvt = fptosi <4 x float> %a to <4 x i64>
|
|
|
|
%shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
ret <2 x i64> %shuf
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
|
|
|
|
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 19:18:47 +08:00
|
|
|
; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <8 x float> %a to <8 x i32>
|
|
|
|
ret <8 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm2
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-11-23 22:01:18 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-18 23:56:33 +08:00
|
|
|
%shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptosi <4 x float> %shuf to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-19 01:05:39 +08:00
|
|
|
define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
|
|
|
|
; SSE-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm2
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm1, %rdx
|
|
|
|
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2si %xmm0, %rsi
|
|
|
|
; AVX512F-NEXT: vmovq %rsi, %xmm0
|
|
|
|
; AVX512F-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx
|
|
|
|
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi
|
|
|
|
; AVX512VL-NEXT: vmovq %rsi, %xmm0
|
|
|
|
; AVX512VL-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512VL-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-19 01:05:39 +08:00
|
|
|
%cvt = fptosi <8 x float> %a to <8 x i64>
|
|
|
|
%shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i64> %shuf
|
|
|
|
}
|
|
|
|
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
|
|
|
; Float to Unsigned Integer
|
|
|
|
;
|
|
|
|
|
2016-11-06 15:50:25 +08:00
|
|
|
define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) {
|
|
|
|
; SSE-LABEL: fptoui_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subss %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
|
|
|
; SSE-NEXT: subss %xmm2, %xmm3
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rcx
|
|
|
|
; SSE-NEXT: ucomiss %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
2016-11-06 15:50:25 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rdx
|
|
|
|
; VEX-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; VEX-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; VEX-NEXT: retq
|
2016-11-06 15:50:25 +08:00
|
|
|
;
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512F-LABEL: fptoui_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
|
|
|
|
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512F-NEXT: vzeroupper
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptoui_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
|
|
|
|
; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-09 15:48:51 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
|
|
|
|
; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2016-11-06 15:50:25 +08:00
|
|
|
%cvt = fptoui <2 x float> %a to <2 x i32>
|
|
|
|
ret <2 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm1
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm2
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm2, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm2
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm1
|
[X86][SSE] Change BUILD_VECTOR interleaving ordering to improve coalescing/combine opportunities
We currently generate BUILD_VECTOR as a tree of UNPCKL shuffles of the same type:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
The issue is because we are not placing sequential vector elements together early enough, we fail to recognise many combinable patterns - consecutive scalar loads, extractions etc.
Instead, this patch unpacks progressively larger sequential vector elements together:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 1, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 2>
Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
This does mean that we are creating UNPCKL shuffle of different value types, but the relevant combines that benefit from this are quite capable of handling the additional BITCASTs that are now included in the shuffle tree.
Differential Revision: https://reviews.llvm.org/D33864
llvm-svn: 304688
2017-06-05 04:12:04 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm0
|
|
|
|
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
[X86][SSE] Change BUILD_VECTOR interleaving ordering to improve coalescing/combine opportunities
We currently generate BUILD_VECTOR as a tree of UNPCKL shuffles of the same type:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
The issue is because we are not placing sequential vector elements together early enough, we fail to recognise many combinable patterns - consecutive scalar loads, extractions etc.
Instead, this patch unpacks progressively larger sequential vector elements together:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 1, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 2>
Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
This does mean that we are creating UNPCKL shuffle of different value types, but the relevant combines that benefit from this are quite capable of handling the additional BITCASTs that are now included in the shuffle tree.
Differential Revision: https://reviews.llvm.org/D33864
llvm-svn: 304688
2017-06-05 04:12:04 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vmovd %ecx, %xmm1
|
|
|
|
; VEX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
|
|
|
; VEX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; VEX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
|
|
|
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-LABEL: fptoui_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512F-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptoui_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <4 x float> %a to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subss %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
|
|
|
; SSE-NEXT: subss %xmm2, %xmm3
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rcx
|
|
|
|
; SSE-NEXT: ucomiss %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rdx
|
|
|
|
; VEX-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; VEX-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512F-LABEL: fptoui_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptoui_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2018-01-02 15:30:53 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2018-01-02 15:30:53 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-12-11 03:35:39 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-18 23:56:33 +08:00
|
|
|
%shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <2 x float> %shuf to <2 x i64>
|
|
|
|
ret <2 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-19 01:05:39 +08:00
|
|
|
define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
|
|
|
|
; SSE-LABEL: fptoui_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; SSE-NEXT: subss %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm1
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
|
|
|
; SSE-NEXT: subss %xmm2, %xmm3
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: xorq %rcx, %rax
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rcx
|
|
|
|
; SSE-NEXT: ucomiss %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rax, %rcx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rcx, %xmm0
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptoui_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
|
|
|
; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm1, %rdx
|
|
|
|
; VEX-NEXT: vucomiss %xmm2, %xmm1
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; VEX-NEXT: xorq %rcx, %rax
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; VEX-NEXT: vucomiss %xmm2, %xmm0
|
|
|
|
; VEX-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; VEX-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; VEX-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; VEX-NEXT: retq
|
2016-08-23 22:37:35 +08:00
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
|
|
|
|
; AVX512F-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
|
|
|
|
; AVX512VL-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
2016-11-23 22:01:18 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512DQ-NEXT: vzeroupper
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
2017-03-03 17:03:24 +08:00
|
|
|
; AVX512VLDQ-NEXT: vzeroupper
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-19 01:05:39 +08:00
|
|
|
%cvt = fptoui <4 x float> %a to <4 x i64>
|
|
|
|
%shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
|
|
|
|
ret <2 x i64> %shuf
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm2
|
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm0
|
|
|
|
; SSE-NEXT: movaps %xmm2, %xmm3
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm2, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm0
|
[X86][SSE] Change BUILD_VECTOR interleaving ordering to improve coalescing/combine opportunities
We currently generate BUILD_VECTOR as a tree of UNPCKL shuffles of the same type:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
The issue is because we are not placing sequential vector elements together early enough, we fail to recognise many combinable patterns - consecutive scalar loads, extractions etc.
Instead, this patch unpacks progressively larger sequential vector elements together:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 1, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 2>
Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
This does mean that we are creating UNPCKL shuffle of different value types, but the relevant combines that benefit from this are quite capable of handling the additional BITCASTs that are now included in the shuffle tree.
Differential Revision: https://reviews.llvm.org/D33864
llvm-svn: 304688
2017-06-05 04:12:04 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm2, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm2
|
|
|
|
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
[X86][SSE] Change BUILD_VECTOR interleaving ordering to improve coalescing/combine opportunities
We currently generate BUILD_VECTOR as a tree of UNPCKL shuffles of the same type:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
The issue is because we are not placing sequential vector elements together early enough, we fail to recognise many combinable patterns - consecutive scalar loads, extractions etc.
Instead, this patch unpacks progressively larger sequential vector elements together:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 1, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 2>
Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
This does mean that we are creating UNPCKL shuffle of different value types, but the relevant combines that benefit from this are quite capable of handling the additional BITCASTs that are now included in the shuffle tree.
Differential Revision: https://reviews.llvm.org/D33864
llvm-svn: 304688
2017-06-05 04:12:04 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm2
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm2, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm2
|
|
|
|
; SSE-NEXT: movaps %xmm1, %xmm3
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm2
|
[X86][SSE] Change BUILD_VECTOR interleaving ordering to improve coalescing/combine opportunities
We currently generate BUILD_VECTOR as a tree of UNPCKL shuffles of the same type:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
The issue is because we are not placing sequential vector elements together early enough, we fail to recognise many combinable patterns - consecutive scalar loads, extractions etc.
Instead, this patch unpacks progressively larger sequential vector elements together:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 1, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 2>
Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
This does mean that we are creating UNPCKL shuffle of different value types, but the relevant combines that benefit from this are quite capable of handling the additional BITCASTs that are now included in the shuffle tree.
Differential Revision: https://reviews.llvm.org/D33864
llvm-svn: 304688
2017-06-05 04:12:04 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm1, %rax
|
|
|
|
; SSE-NEXT: movd %eax, %xmm1
|
|
|
|
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
[X86][SSE] Change BUILD_VECTOR interleaving ordering to improve coalescing/combine opportunities
We currently generate BUILD_VECTOR as a tree of UNPCKL shuffles of the same type:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
The issue is because we are not placing sequential vector elements together early enough, we fail to recognise many combinable patterns - consecutive scalar loads, extractions etc.
Instead, this patch unpacks progressively larger sequential vector elements together:
e.g. for v4f32:
Step 1: unpcklps 0, 2 ==> X: <?, ?, 1, 0>
: unpcklps 1, 3 ==> Y: <?, ?, 3, 2>
Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
This does mean that we are creating UNPCKL shuffle of different value types, but the relevant combines that benefit from this are quite capable of handling the additional BITCASTs that are now included in the shuffle tree.
Differential Revision: https://reviews.llvm.org/D33864
llvm-svn: 304688
2017-06-05 04:12:04 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm1
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm1, %rcx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm2
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
|
|
|
|
; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX1-NEXT: vmovd %ecx, %xmm2
|
|
|
|
; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm1, %rcx
|
|
|
|
; AVX2-NEXT: vmovd %ecx, %xmm2
|
|
|
|
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
|
|
|
|
; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm2, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX2-NEXT: vmovd %ecx, %xmm2
|
|
|
|
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <8 x float> %a to <8 x i32>
|
|
|
|
ret <8 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm2
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm2
|
|
|
|
; SSE-NEXT: cvttss2si %xmm2, %rcx
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm2
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: cvttss2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm3
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm3
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: cvttss2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm3
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm4
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: cvttss2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 19:18:47 +08:00
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rdx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rdx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm3
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rdx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
|
|
|
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm2, %rdx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm2
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rdx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm3
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rdx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm2, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-11-23 22:01:18 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0
|
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-18 23:56:33 +08:00
|
|
|
%shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
2015-05-02 19:18:47 +08:00
|
|
|
%cvt = fptoui <4 x float> %shuf to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
2015-05-02 21:04:07 +08:00
|
|
|
|
2015-07-19 01:05:39 +08:00
|
|
|
define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
|
|
|
|
; SSE-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm2
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm2
|
|
|
|
; SSE-NEXT: cvttss2si %xmm2, %rcx
|
|
|
|
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm2
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movaps %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: cvttss2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rdx
|
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm3
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm3
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
2018-02-28 00:59:10 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: movaps %xmm3, %xmm4
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: cvttss2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
|
|
|
; SSE-NEXT: cvttss2si %xmm3, %rdx
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm3
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rdx
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rdx, %xmm3
|
2016-08-23 22:37:35 +08:00
|
|
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
|
|
|
; SSE-NEXT: movaps %xmm0, %xmm4
|
|
|
|
; SSE-NEXT: subss %xmm1, %xmm4
|
|
|
|
; SSE-NEXT: cvttss2si %xmm4, %rcx
|
|
|
|
; SSE-NEXT: xorq %rax, %rcx
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
|
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: cmovaeq %rcx, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
2015-07-19 01:05:39 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
|
|
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
|
|
|
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm2, %rdx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm2
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rdx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm3
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rdx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX1-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX1-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX1-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX1-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX1-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; AVX1-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX2: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
|
|
|
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm2, %rdx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm2
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm2
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rdx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm3
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm3, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rdx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rdx
|
|
|
|
; AVX2-NEXT: vmovq %rdx, %xmm3
|
|
|
|
; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
|
|
|
|
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm4, %rax
|
|
|
|
; AVX2-NEXT: xorq %rcx, %rax
|
|
|
|
; AVX2-NEXT: vcvttss2si %xmm0, %rcx
|
|
|
|
; AVX2-NEXT: vucomiss %xmm1, %xmm0
|
|
|
|
; AVX2-NEXT: cmovaeq %rax, %rcx
|
|
|
|
; AVX2-NEXT: vmovq %rcx, %xmm0
|
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
|
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512F-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512F: # %bb.0:
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm1, %rax
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx
|
|
|
|
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx
|
|
|
|
; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi
|
|
|
|
; AVX512F-NEXT: vmovq %rsi, %xmm0
|
|
|
|
; AVX512F-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512F-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX512F-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
2016-08-23 22:37:35 +08:00
|
|
|
; AVX512F-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VL: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx
|
|
|
|
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
|
|
|
; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi
|
|
|
|
; AVX512VL-NEXT: vmovq %rsi, %xmm0
|
|
|
|
; AVX512VL-NEXT: vmovq %rdx, %xmm1
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX512VL-NEXT: vmovq %rcx, %xmm1
|
|
|
|
; AVX512VL-NEXT: vmovq %rax, %xmm2
|
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512DQ: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-23 06:04:50 +08:00
|
|
|
; AVX512DQ-NEXT: retq
|
|
|
|
;
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512VLDQ: # %bb.0:
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0
|
2018-02-01 06:04:26 +08:00
|
|
|
; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
2016-11-08 14:58:53 +08:00
|
|
|
; AVX512VLDQ-NEXT: retq
|
2015-07-19 01:05:39 +08:00
|
|
|
%cvt = fptoui <8 x float> %a to <8 x i64>
|
|
|
|
%shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i64> %shuf
|
|
|
|
}
|
|
|
|
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
|
|
|
; Constant Folding
|
|
|
|
;
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <2 x i64> @fptosi_2f64_to_2i64_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_2f64_to_2i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_2f64_to_2i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
|
|
|
|
ret <2 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptosi_2f64_to_2i32_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_2f64_to_2i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_2f64_to_2i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptosi_4f64_to_4i64_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f64_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
|
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_4f64_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptosi_4f64_to_4i32_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f64_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_4f64_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <2 x i64> @fptoui_2f64_to_2i64_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_2f64_to_2i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_2f64_to_2i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
|
|
|
|
ret <2 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_2f64_to_2i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_2f64_to_2i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f64_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
|
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_4f64_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f64_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_4f64_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptosi_4f32_to_4i32_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f32_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_4f32_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptosi_4f32_to_4i64_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_4f32_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
|
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_4f32_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptosi_8f32_to_8i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
|
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptosi_8f32_to_8i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
|
|
|
|
ret <8 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f32_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_4f32_to_4i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
|
|
|
|
ret <4 x i32> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <4 x i64> @fptoui_4f32_to_4i64_const() {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_4f32_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
|
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_4f32_to_4i64_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
|
|
|
|
ret <4 x i64> %cvt
|
|
|
|
}
|
|
|
|
|
2015-07-17 05:00:57 +08:00
|
|
|
define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-LABEL: fptoui_8f32_to_8i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-07-19 00:53:51 +08:00
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
|
|
|
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
|
|
|
|
; SSE-NEXT: retq
|
2015-05-02 21:04:07 +08:00
|
|
|
;
|
2015-07-17 05:00:57 +08:00
|
|
|
; AVX-LABEL: fptoui_8f32_to_8i32_const:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-05-02 21:04:07 +08:00
|
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
|
|
|
|
ret <8 x i32> %cvt
|
|
|
|
}
|
2016-10-07 03:29:25 +08:00
|
|
|
|
|
|
|
;
|
|
|
|
; Special Cases
|
|
|
|
;
|
|
|
|
|
|
|
|
define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
|
|
|
|
; SSE-LABEL: fptosi_2f16_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: pushq %rax
|
|
|
|
; SSE-NEXT: movss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill
|
|
|
|
; SSE-NEXT: callq __gnu_f2h_ieee
|
|
|
|
; SSE-NEXT: movzwl %ax, %edi
|
|
|
|
; SSE-NEXT: callq __gnu_h2f_ieee
|
|
|
|
; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
|
|
|
|
; SSE-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
|
|
|
|
; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
|
|
; SSE-NEXT: callq __gnu_f2h_ieee
|
|
|
|
; SSE-NEXT: movzwl %ax, %edi
|
|
|
|
; SSE-NEXT: callq __gnu_h2f_ieee
|
|
|
|
; SSE-NEXT: cvttss2si %xmm0, %rax
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm1
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: cvttss2si (%rsp), %rax # 4-byte Folded Reload
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; SSE-NEXT: pxor %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: popq %rax
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-LABEL: fptosi_2f16_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; VEX: # %bb.0:
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: pushq %rax
|
|
|
|
; VEX-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill
|
|
|
|
; VEX-NEXT: callq __gnu_f2h_ieee
|
|
|
|
; VEX-NEXT: movzwl %ax, %edi
|
|
|
|
; VEX-NEXT: callq __gnu_h2f_ieee
|
|
|
|
; VEX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill
|
|
|
|
; VEX-NEXT: vmovss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
|
|
|
|
; VEX-NEXT: # xmm0 = mem[0],zero,zero,zero
|
|
|
|
; VEX-NEXT: callq __gnu_f2h_ieee
|
|
|
|
; VEX-NEXT: movzwl %ax, %edi
|
|
|
|
; VEX-NEXT: callq __gnu_h2f_ieee
|
|
|
|
; VEX-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm0
|
|
|
|
; VEX-NEXT: vcvttss2si (%rsp), %rax # 4-byte Folded Reload
|
|
|
|
; VEX-NEXT: vmovq %rax, %xmm1
|
|
|
|
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
2016-11-23 06:04:50 +08:00
|
|
|
; VEX-NEXT: popq %rax
|
|
|
|
; VEX-NEXT: retq
|
2016-10-07 03:29:25 +08:00
|
|
|
;
|
2017-11-07 06:49:04 +08:00
|
|
|
; AVX512-LABEL: fptosi_2f16_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512: # %bb.0:
|
2017-11-07 06:49:04 +08:00
|
|
|
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
|
|
|
; AVX512-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
|
|
|
|
; AVX512-NEXT: vcvttss2si %xmm1, %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm1
|
|
|
|
; AVX512-NEXT: vcvttss2si %xmm0, %rax
|
|
|
|
; AVX512-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
|
|
; AVX512-NEXT: retq
|
2016-10-07 03:29:25 +08:00
|
|
|
%cvt = fptosi <2 x half> %a to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
|
|
|
|
; SSE-LABEL: fptosi_2f80_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
|
|
|
; SSE-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
|
|
|
; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
|
|
|
|
; SSE-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
|
|
|
|
; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fistpll -{{[0-9]+}}(%rsp)
|
|
|
|
; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
|
2017-09-18 11:29:54 +08:00
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
|
|
|
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
2017-09-18 12:40:58 +08:00
|
|
|
; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
2017-09-18 11:29:54 +08:00
|
|
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: fptosi_2f80_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2016-10-07 03:29:25 +08:00
|
|
|
; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
|
|
|
|
; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
|
|
|
|
; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
|
|
|
; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
|
2017-09-18 11:29:54 +08:00
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
2017-09-18 12:40:58 +08:00
|
|
|
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
2016-10-07 03:29:25 +08:00
|
|
|
; AVX-NEXT: retq
|
|
|
|
%cvt = fptosi <2 x x86_fp80> %a to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
|
|
|
|
; SSE-LABEL: fptosi_2f128_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: pushq %r14
|
|
|
|
; SSE-NEXT: pushq %rbx
|
|
|
|
; SSE-NEXT: subq $24, %rsp
|
|
|
|
; SSE-NEXT: movq %rsi, %r14
|
|
|
|
; SSE-NEXT: movq %rdi, %rbx
|
|
|
|
; SSE-NEXT: movq %rdx, %rdi
|
|
|
|
; SSE-NEXT: movq %rcx, %rsi
|
|
|
|
; SSE-NEXT: callq __fixtfdi
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2016-12-07 20:10:49 +08:00
|
|
|
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: movq %rbx, %rdi
|
|
|
|
; SSE-NEXT: movq %r14, %rsi
|
|
|
|
; SSE-NEXT: callq __fixtfdi
|
2017-04-26 15:08:44 +08:00
|
|
|
; SSE-NEXT: movq %rax, %xmm0
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
|
|
|
|
; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
|
[x86] use a single shufps when it can save instructions
This is a tiny patch with a big pile of test changes.
This partially fixes PR27885:
https://llvm.org/bugs/show_bug.cgi?id=27885
My motivating case looks like this:
- vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
- vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
- vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+ vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
And this happens several times in the diffs. For chips with domain-crossing penalties,
the instruction count and size reduction should usually overcome any potential
domain-crossing penalty due to using an FP op in a sequence of int ops. For chips such
as recent Intel big cores and Atom, there is no domain-crossing penalty for shufps, so
using shufps is a pure win.
So the test case diffs all appear to be improvements except one test in
vector-shuffle-combining.ll where we miss an opportunity to use a shift to generate
zero elements and one test in combine-sra.ll where multiple uses prevent the expected
shuffle combining.
Differential Revision: https://reviews.llvm.org/D27692
llvm-svn: 289837
2016-12-16 02:03:38 +08:00
|
|
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
|
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
2016-10-07 03:29:25 +08:00
|
|
|
; SSE-NEXT: addq $24, %rsp
|
|
|
|
; SSE-NEXT: popq %rbx
|
|
|
|
; SSE-NEXT: popq %r14
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX-LABEL: fptosi_2f128_to_4i32:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX-NEXT: pushq %r14
|
|
|
|
; AVX-NEXT: pushq %rbx
|
|
|
|
; AVX-NEXT: subq $24, %rsp
|
|
|
|
; AVX-NEXT: movq %rsi, %r14
|
|
|
|
; AVX-NEXT: movq %rdi, %rbx
|
|
|
|
; AVX-NEXT: movq %rdx, %rdi
|
|
|
|
; AVX-NEXT: movq %rcx, %rsi
|
|
|
|
; AVX-NEXT: callq __fixtfdi
|
|
|
|
; AVX-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
|
|
|
|
; AVX-NEXT: movq %rbx, %rdi
|
|
|
|
; AVX-NEXT: movq %r14, %rsi
|
|
|
|
; AVX-NEXT: callq __fixtfdi
|
|
|
|
; AVX-NEXT: vmovq %rax, %xmm0
|
|
|
|
; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
|
|
|
|
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
|
|
|
|
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
|
|
|
|
; AVX-NEXT: addq $24, %rsp
|
|
|
|
; AVX-NEXT: popq %rbx
|
|
|
|
; AVX-NEXT: popq %r14
|
|
|
|
; AVX-NEXT: retq
|
2016-10-07 03:29:25 +08:00
|
|
|
%cvt = fptosi <2 x fp128> %a to <2 x i32>
|
|
|
|
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i32> %ext
|
|
|
|
}
|