2015-11-24 05:33:58 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2017-06-21 22:51:23 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
|
2014-08-14 20:13:59 +08:00
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_0000:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_0000:
|
2014-09-30 10:32:36 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 08:41:21 +08:00
|
|
|
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
2014-09-30 10:32:36 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_0000:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-30 10:32:36 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_0001:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_0001:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_0001:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4f64_0020:
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_0020:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_0020:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4f64_0300:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
|
2014-10-04 06:43:17 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_0300:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_0300:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4f64_1000:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_1000:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_1000:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4f64_2200:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
2016-02-14 05:54:04 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_2200:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_2200:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2017-02-11 03:51:47 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_2222:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_2222:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_2222:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_2222_bc:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_2222_bc:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_2222_bc:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%tmp0 = bitcast <4 x i64> %a to <4 x double>
|
|
|
|
%tmp1 = bitcast <4 x i64> %b to <4 x double>
|
|
|
|
%shuffle = shufflevector <4 x double> %tmp0, <4 x double> %tmp1, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4f64_3330:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
2015-02-15 20:42:15 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_3330:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_3330:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4f64_3210:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
2016-02-14 05:54:04 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_3210:
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
|
2014-09-25 19:03:55 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_3210:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_0023:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
|
|
|
; ALL-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; ALL-LABEL: shuffle_v4f64_0022:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
2015-11-19 16:26:56 +08:00
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
|
|
|
|
; ALL-LABEL: shuffle_v4f64mem_0022:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
|
|
|
|
; ALL-NEXT: retq
|
|
|
|
%a = load <4 x double>, <4 x double>* %ptr
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
2015-01-31 22:09:36 +08:00
|
|
|
ret <4 x double> %shuffle
|
2014-08-15 19:01:40 +08:00
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1032:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1133:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1023:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1022:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
|
2015-12-23 21:10:07 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_0423:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
|
|
|
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; ALL-LABEL: shuffle_v4f64_0462:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
|
|
|
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
|
|
|
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
|
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
|
2014-08-15 19:01:40 +08:00
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 06:35:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_0426:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
|
|
|
ret <4 x double> %shuffle
|
2014-08-16 01:42:00 +08:00
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-16 01:42:00 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 06:35:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1537:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-16 01:42:00 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-16 01:42:00 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 06:35:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_4062:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-16 01:42:00 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-16 01:42:00 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 06:35:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_5173:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-16 01:42:00 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
2014-08-15 19:01:40 +08:00
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-15 19:01:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_5163:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
|
|
|
|
; ALL-NEXT: retq
|
2014-08-15 19:01:40 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-08-14 20:13:59 +08:00
|
|
|
|
2014-09-21 19:12:19 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_0527:
|
2014-09-21 19:12:19 +08:00
|
|
|
; ALL: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
2014-09-21 19:12:19 +08:00
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_4163:
|
2014-09-21 19:12:19 +08:00
|
|
|
; ALL: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
|
2014-09-21 19:12:19 +08:00
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
|
2017-01-03 13:46:18 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_0145:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2014-09-21 19:12:19 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
|
2017-01-03 13:46:18 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_4501:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2014-09-21 19:12:19 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_0167:
|
2014-09-21 19:12:19 +08:00
|
|
|
; ALL: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
|
2014-09-21 19:12:19 +08:00
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2014-11-21 20:17:50 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
|
2017-01-03 13:46:18 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1054:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; ALL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
|
2016-08-02 13:11:15 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_3254:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; ALL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
|
2016-08-02 13:11:15 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_3276:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; ALL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 07:29:49 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_1076:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
|
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
|
|
|
; ALL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2014-11-22 13:44:43 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_0415:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_0415:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
|
|
|
|
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_0415:
|
|
|
|
; AVX512VL: # BB#0:
|
2016-10-23 04:15:39 +08:00
|
|
|
; AVX512VL-NEXT: vmovapd {{.*#+}} ymm2 = [0,4,1,5]
|
|
|
|
; AVX512VL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
2014-11-22 13:44:43 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-02-15 23:07:45 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
|
2015-11-18 06:35:45 +08:00
|
|
|
; ALL-LABEL: shuffle_v4f64_u062:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
|
|
|
; ALL-NEXT: retq
|
2015-02-15 23:07:45 +08:00
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-12-23 21:10:07 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; ALL-LABEL: shuffle_v4f64_15uu:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-11-18 17:39:38 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; ALL-LABEL: shuffle_v4f64_11uu:
|
|
|
|
; ALL: # BB#0:
|
2016-06-28 16:08:15 +08:00
|
|
|
; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
|
2015-11-18 17:39:38 +08:00
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_22uu:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_22uu:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_22uu:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_3333:
|
|
|
|
; AVX1: # BB#0:
|
2016-02-14 05:54:04 +08:00
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
2015-11-18 17:39:38 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_3333:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_3333:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2017-03-21 23:12:53 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_0z3z:
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2017-03-21 23:12:53 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4f64_0z3z:
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
2017-03-21 23:12:53 +08:00
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_0z3z:
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
|
2017-08-03 16:50:18 +08:00
|
|
|
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double undef, double undef, double undef>, <4 x i32> <i32 0, i32 4, i32 3, i32 4>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2017-03-21 21:30:40 +08:00
|
|
|
define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4f64_1z2z:
|
|
|
|
; AVX1: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4f64_1z2z:
|
|
|
|
; AVX2: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4f64_1z2z:
|
|
|
|
; AVX512VL: # BB#0:
|
2017-08-03 16:50:18 +08:00
|
|
|
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%1 = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double undef, double undef, double undef>, <4 x i32> <i32 1, i32 4, i32 2, i32 4>
|
|
|
|
ret <4 x double> %1
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_0000:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0000:
|
2014-09-30 10:32:36 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 08:41:21 +08:00
|
|
|
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
2014-09-30 10:32:36 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0000:
|
|
|
|
; AVX512VL: # BB#0:
|
2016-09-29 13:54:39 +08:00
|
|
|
; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-30 10:32:36 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_0001:
|
|
|
|
; AVX1: # BB#0:
|
2016-06-28 16:08:15 +08:00
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0001:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0001:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_0020:
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
2016-06-28 16:08:15 +08:00
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0020:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0020:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_0112:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
2016-08-16 18:03:23 +08:00
|
|
|
; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0112:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0112:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_0300:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
|
2014-10-04 06:43:17 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0300:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0300:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_1000:
|
|
|
|
; AVX1: # BB#0:
|
2016-06-28 16:08:15 +08:00
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_1000:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1000:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_2200:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
2016-02-14 05:54:04 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_2200:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_2200:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_3330:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
2015-02-15 20:42:15 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_3330:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_3330:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_3210:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
2016-02-14 05:54:04 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_3210:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_3210:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-22 11:05:23 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_0124:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
|
|
|
|
; AVX1-NEXT: retq
|
2014-09-24 06:39:02 +08:00
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0124:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 08:41:21 +08:00
|
|
|
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0124:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_0142:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2015-12-23 21:10:07 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
|
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0142:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2015-12-23 21:10:07 +08:00
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0142:
|
|
|
|
; AVX512VL: # BB#0:
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_0412:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2017-08-11 21:21:41 +08:00
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
2016-08-16 18:03:23 +08:00
|
|
|
; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0412:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2015-12-23 21:10:07 +08:00
|
|
|
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0412:
|
|
|
|
; AVX512VL: # BB#0:
|
2015-12-23 21:10:07 +08:00
|
|
|
; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_4012:
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
2016-08-16 18:03:23 +08:00
|
|
|
; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
|
2016-06-28 16:08:15 +08:00
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
2015-01-31 22:09:36 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX1-NEXT: retq
|
2014-09-24 06:39:02 +08:00
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_4012:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_4012:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
|
2017-08-01 06:07:29 +08:00
|
|
|
; ALL-LABEL: shuffle_v4i64_0145:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_0451:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-11-13 12:06:10 +08:00
|
|
|
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_0451:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
|
2016-04-19 20:26:40 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0451:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
|
2016-04-19 20:26:40 +08:00
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
|
2017-08-01 06:07:29 +08:00
|
|
|
; ALL-LABEL: shuffle_v4i64_4501:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-09-21 19:07:41 +08:00
|
|
|
|
2014-08-14 20:13:59 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_4015:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-11-13 12:06:10 +08:00
|
|
|
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_4015:
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2016-04-19 20:26:40 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
|
2014-09-24 06:39:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_4015:
|
|
|
|
; AVX512VL: # BB#0:
|
2016-04-19 20:26:40 +08:00
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-08-14 20:13:59 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
[x86] Start fixing a really subtle and terrible form of miscompile in
these DAG combines.
The DAG auto-CSE thing is truly terrible. Due to it, when RAUW-ing
a node with its operand, you can cause its uses to CSE to itself, which
then causes their uses to become your uses which causes them to be
picked up by the RAUW. For nodes that are determined to be "no-ops",
this is "fine". But if the RAUW is one of several steps to enact
a transformation, this causes the DAG to really silently eat an discard
nodes that you would never expect. It took days for me to actually
pinpoint a test case triggering this and a really frustrating amount of
time to even comprehend the bug because I never even thought about the
ability of RAUW to iteratively consume nodes due to CSE-ing them into
itself.
To fix this, we have to build up a brand-new chain of operations any
time we are combining across (potentially) intervening nodes. But once
the logic is added to do this, another issue surfaces: CombineTo eagerly
deletes the one node combined, *but no others*. This is... really
frustrating. If deleting it makes its operands become dead, those
operand nodes often won't go onto the worklist in the
order you would want -- they're already on it and not near the top. That
means things higher on the worklist will get combined prior to these
dead nodes being GCed out of the worklist, and if the chain is long, the
immediate users won't be enough to re-detect where the root of the chain
is that became single-use again after deleting the dead nodes. The
better way to do this is to never immediately delete nodes, and instead
to just enqueue them so we can recursively delete them. The
combined-from node is typically not on the worklist anyways by virtue of
having been popped off.... But that in turn breaks other tests that
*require* CombineTo to delete unused nodes. :: sigh ::
Fortunately, there is a better way. This whole routine should have been
returning the replacement rather than using CombineTo which is quite
hacky. Switch to that, and all the pieces fall together.
I suspect the same kind of miscompile is possible in the half-shuffle
folding code, and potentially the recursive folding code. I'll be
switching those over to a pattern more like this one for safety's sake
even though I don't immediately have any test cases for them. Note that
the only way I got a test case for this instance was with *heavily* DAG
combined 256-bit shuffle sequences generated by my fuzzer. ;]
llvm-svn: 216319
2014-08-23 18:25:15 +08:00
|
|
|
|
2014-09-27 01:11:02 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_2u35:
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
2014-11-13 12:06:10 +08:00
|
|
|
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_2u35:
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2015-02-15 20:42:15 +08:00
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_2u35:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-27 01:11:02 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-LABEL: shuffle_v4i64_1251:
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX1: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-LABEL: shuffle_v4i64_1251:
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX2: # BB#0:
|
2014-10-01 06:04:45 +08:00
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
|
2014-09-27 01:11:02 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1251:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-27 01:11:02 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2014-11-21 20:17:50 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_1054:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
2014-11-21 21:56:05 +08:00
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_1054:
|
|
|
|
; AVX2: # BB#0:
|
2014-11-21 21:56:05 +08:00
|
|
|
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1054:
|
|
|
|
; AVX512VL: # BB#0:
|
2017-01-03 13:46:18 +08:00
|
|
|
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_3254:
|
|
|
|
; AVX1: # BB#0:
|
2014-11-21 21:56:05 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_3254:
|
|
|
|
; AVX2: # BB#0:
|
2014-11-21 21:56:05 +08:00
|
|
|
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
|
|
|
|
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_3254:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
|
|
|
|
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_3276:
|
|
|
|
; AVX1: # BB#0:
|
2014-11-21 21:56:05 +08:00
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_3276:
|
|
|
|
; AVX2: # BB#0:
|
2014-11-21 21:56:05 +08:00
|
|
|
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
|
|
|
|
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_3276:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
|
|
|
|
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_1076:
|
|
|
|
; AVX1: # BB#0:
|
2014-11-21 22:33:24 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_1076:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
2014-11-21 22:33:24 +08:00
|
|
|
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
2014-11-21 20:17:50 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1076:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
|
|
|
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-11-21 20:17:50 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2014-11-22 13:44:43 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_0415:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_0415:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_0415:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-11-22 13:44:43 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-02-15 21:19:52 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_z4z6:
|
|
|
|
; AVX1: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2015-02-15 21:19:52 +08:00
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_z4z6:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_z4z6:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
|
|
|
|
; AVX512VL-NEXT: retq
|
2015-02-15 21:19:52 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_5zuz:
|
|
|
|
; AVX1: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2015-02-18 06:24:32 +08:00
|
|
|
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
|
2015-02-15 21:19:52 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_5zuz:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_5zuz:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
|
|
|
|
; AVX512VL-NEXT: retq
|
2015-02-15 21:19:52 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-02-15 23:07:45 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_40u2:
|
|
|
|
; AVX1: # BB#0:
|
2015-02-18 06:24:32 +08:00
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
2015-02-15 23:07:45 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_40u2:
|
|
|
|
; AVX2: # BB#0:
|
2015-02-18 06:24:32 +08:00
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
2015-02-15 23:07:45 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_40u2:
|
|
|
|
; AVX512VL: # BB#0:
|
2015-11-18 06:35:45 +08:00
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
2015-02-15 23:07:45 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-12-23 21:10:07 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; ALL-LABEL: shuffle_v4i64_15uu:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
|
|
|
; ALL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2015-11-18 17:39:38 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
|
2015-12-23 21:10:07 +08:00
|
|
|
; ALL-LABEL: shuffle_v4i64_11uu:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
|
|
|
|
; ALL-NEXT: retq
|
2015-11-18 17:39:38 +08:00
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_22uu:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
2016-01-17 06:30:20 +08:00
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
2015-11-18 17:39:38 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_22uu:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_22uu:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_3333:
|
|
|
|
; AVX1: # BB#0:
|
2016-02-14 05:54:04 +08:00
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
|
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
2015-11-18 17:39:38 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_3333:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_3333:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
2017-01-14 02:23:47 +08:00
|
|
|
define <4 x i64> @shuffle_v4i64_1z3z(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_1z3z:
|
|
|
|
; AVX1: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-01-14 02:23:47 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_1z3z:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1z3z:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
[x86] Start fixing a really subtle and terrible form of miscompile in
these DAG combines.
The DAG auto-CSE thing is truly terrible. Due to it, when RAUW-ing
a node with its operand, you can cause its uses to CSE to itself, which
then causes their uses to become your uses which causes them to be
picked up by the RAUW. For nodes that are determined to be "no-ops",
this is "fine". But if the RAUW is one of several steps to enact
a transformation, this causes the DAG to really silently eat an discard
nodes that you would never expect. It took days for me to actually
pinpoint a test case triggering this and a really frustrating amount of
time to even comprehend the bug because I never even thought about the
ability of RAUW to iteratively consume nodes due to CSE-ing them into
itself.
To fix this, we have to build up a brand-new chain of operations any
time we are combining across (potentially) intervening nodes. But once
the logic is added to do this, another issue surfaces: CombineTo eagerly
deletes the one node combined, *but no others*. This is... really
frustrating. If deleting it makes its operands become dead, those
operand nodes often won't go onto the worklist in the
order you would want -- they're already on it and not near the top. That
means things higher on the worklist will get combined prior to these
dead nodes being GCed out of the worklist, and if the chain is long, the
immediate users won't be enough to re-detect where the root of the chain
is that became single-use again after deleting the dead nodes. The
better way to do this is to never immediately delete nodes, and instead
to just enqueue them so we can recursively delete them. The
combined-from node is typically not on the worklist anyways by virtue of
having been popped off.... But that in turn breaks other tests that
*require* CombineTo to delete unused nodes. :: sigh ::
Fortunately, there is a better way. This whole routine should have been
returning the replacement rather than using CombineTo which is quite
hacky. Switch to that, and all the pieces fall together.
I suspect the same kind of miscompile is possible in the half-shuffle
folding code, and potentially the recursive folding code. I'll be
switching those over to a pattern more like this one for safety's sake
even though I don't immediately have any test cases for them. Note that
the only way I got a test case for this instance was with *heavily* DAG
combined 256-bit shuffle sequences generated by my fuzzer. ;]
llvm-svn: 216319
2014-08-23 18:25:15 +08:00
|
|
|
define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
|
2015-02-04 18:46:53 +08:00
|
|
|
; ALL-LABEL: stress_test1:
|
|
|
|
; ALL: retq
|
[x86] Start fixing a really subtle and terrible form of miscompile in
these DAG combines.
The DAG auto-CSE thing is truly terrible. Due to it, when RAUW-ing
a node with its operand, you can cause its uses to CSE to itself, which
then causes their uses to become your uses which causes them to be
picked up by the RAUW. For nodes that are determined to be "no-ops",
this is "fine". But if the RAUW is one of several steps to enact
a transformation, this causes the DAG to really silently eat an discard
nodes that you would never expect. It took days for me to actually
pinpoint a test case triggering this and a really frustrating amount of
time to even comprehend the bug because I never even thought about the
ability of RAUW to iteratively consume nodes due to CSE-ing them into
itself.
To fix this, we have to build up a brand-new chain of operations any
time we are combining across (potentially) intervening nodes. But once
the logic is added to do this, another issue surfaces: CombineTo eagerly
deletes the one node combined, *but no others*. This is... really
frustrating. If deleting it makes its operands become dead, those
operand nodes often won't go onto the worklist in the
order you would want -- they're already on it and not near the top. That
means things higher on the worklist will get combined prior to these
dead nodes being GCed out of the worklist, and if the chain is long, the
immediate users won't be enough to re-detect where the root of the chain
is that became single-use again after deleting the dead nodes. The
better way to do this is to never immediately delete nodes, and instead
to just enqueue them so we can recursively delete them. The
combined-from node is typically not on the worklist anyways by virtue of
having been popped off.... But that in turn breaks other tests that
*require* CombineTo to delete unused nodes. :: sigh ::
Fortunately, there is a better way. This whole routine should have been
returning the replacement rather than using CombineTo which is quite
hacky. Switch to that, and all the pieces fall together.
I suspect the same kind of miscompile is possible in the half-shuffle
folding code, and potentially the recursive folding code. I'll be
switching those over to a pattern more like this one for safety's sake
even though I don't immediately have any test cases for them. Note that
the only way I got a test case for this instance was with *heavily* DAG
combined 256-bit shuffle sequences generated by my fuzzer. ;]
llvm-svn: 216319
2014-08-23 18:25:15 +08:00
|
|
|
%c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
|
|
|
|
%d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
|
|
|
|
%e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
|
|
|
|
%f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
|
|
|
|
|
|
|
|
ret <4 x i64> %f
|
|
|
|
}
|
2014-09-21 20:49:46 +08:00
|
|
|
|
|
|
|
define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
|
2015-06-24 08:03:48 +08:00
|
|
|
; ALL-LABEL: insert_reg_and_zero_v4i64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vmovq %rdi, %xmm0
|
|
|
|
; ALL-NEXT: retq
|
2014-09-21 20:49:46 +08:00
|
|
|
%v = insertelement <4 x i64> undef, i64 %a, i64 0
|
|
|
|
%shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
|
2016-02-02 21:32:56 +08:00
|
|
|
; ALL-LABEL: insert_mem_and_zero_v4i64:
|
|
|
|
; ALL: # BB#0:
|
2016-12-16 00:05:29 +08:00
|
|
|
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
2016-02-02 21:32:56 +08:00
|
|
|
; ALL-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load i64, i64* %ptr
|
2014-09-21 20:49:46 +08:00
|
|
|
%v = insertelement <4 x i64> undef, i64 %a, i64 0
|
|
|
|
%shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
2015-11-18 05:19:45 +08:00
|
|
|
; AVX1-LABEL: insert_reg_and_zero_v4f64:
|
|
|
|
; AVX1: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2015-11-18 05:19:45 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: insert_reg_and_zero_v4f64:
|
|
|
|
; AVX2: # BB#0:
|
2016-07-09 08:19:07 +08:00
|
|
|
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2015-11-18 05:19:45 +08:00
|
|
|
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
|
|
|
|
; AVX512VL: # BB#0:
|
2017-01-14 15:29:24 +08:00
|
|
|
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2016-02-02 21:32:56 +08:00
|
|
|
; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
2014-09-21 20:49:46 +08:00
|
|
|
%v = insertelement <4 x double> undef, double %a, i32 0
|
|
|
|
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
|
2016-02-02 21:32:56 +08:00
|
|
|
; ALL-LABEL: insert_mem_and_zero_v4f64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
|
|
|
; ALL-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load double, double* %ptr
|
2014-09-21 20:49:46 +08:00
|
|
|
%v = insertelement <4 x double> undef, double %a, i32 0
|
|
|
|
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
2014-10-01 08:41:21 +08:00
|
|
|
|
|
|
|
define <4 x double> @splat_mem_v4f64(double* %ptr) {
|
|
|
|
; ALL-LABEL: splat_mem_v4f64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load double, double* %ptr
|
2014-10-01 08:41:21 +08:00
|
|
|
%v = insertelement <4 x double> undef, double %a, i32 0
|
|
|
|
%shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
|
2016-09-29 13:54:39 +08:00
|
|
|
; ALL-LABEL: splat_mem_v4i64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%a = load i64, i64* %ptr
|
2014-10-01 08:41:21 +08:00
|
|
|
%v = insertelement <4 x i64> undef, i64 %a, i64 0
|
|
|
|
%shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2014-10-14 00:16:16 +08:00
|
|
|
|
|
|
|
define <4 x double> @splat_mem_v4f64_2(double* %p) {
|
|
|
|
; ALL-LABEL: splat_mem_v4f64_2:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load double, double* %p
|
2014-10-14 00:16:16 +08:00
|
|
|
%2 = insertelement <2 x double> undef, double %1, i32 0
|
|
|
|
%3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x double> %3
|
|
|
|
}
|
|
|
|
|
2015-01-31 22:09:36 +08:00
|
|
|
define <4 x double> @splat_v4f64(<2 x double> %r) {
|
|
|
|
; AVX1-LABEL: splat_v4f64:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
2014-10-14 00:16:16 +08:00
|
|
|
; AVX2-LABEL: splat_v4f64:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: splat_v4f64:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
|
|
; AVX512VL-NEXT: retq
|
2014-10-14 00:16:16 +08:00
|
|
|
%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x double> %1
|
|
|
|
}
|
2015-03-06 01:14:04 +08:00
|
|
|
|
[X86] Don't generate vbroadcasti128 for v4i64 splats from memory.
We used to erroneously match:
(v4i64 shuffle (v2i64 load), <0,0,0,0>)
Whereas vbroadcasti128 is more like:
(v4i64 shuffle (v2i64 load), <0,1,0,1>)
This problem doesn't exist for vbroadcastf128, which kept matching
the intrinsic after r231182. We should perhaps re-introduce the
intrinsic here as well, but that's a separate issue still being
discussed.
While there, add some proper vbroadcastf128 tests. We don't currently
match those, like for loading vbroadcastsd/ss on AVX (the reg-reg
broadcasts where added in AVX2).
Fixes PR23886.
llvm-svn: 240488
2015-06-24 08:07:16 +08:00
|
|
|
define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
|
2016-09-29 13:54:39 +08:00
|
|
|
; ALL-LABEL: splat_mem_v4i64_from_v2i64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; ALL-NEXT: retq
|
[X86] Don't generate vbroadcasti128 for v4i64 splats from memory.
We used to erroneously match:
(v4i64 shuffle (v2i64 load), <0,0,0,0>)
Whereas vbroadcasti128 is more like:
(v4i64 shuffle (v2i64 load), <0,1,0,1>)
This problem doesn't exist for vbroadcastf128, which kept matching
the intrinsic after r231182. We should perhaps re-introduce the
intrinsic here as well, but that's a separate issue still being
discussed.
While there, add some proper vbroadcastf128 tests. We don't currently
match those, like for loading vbroadcastsd/ss on AVX (the reg-reg
broadcasts where added in AVX2).
Fixes PR23886.
llvm-svn: 240488
2015-06-24 08:07:16 +08:00
|
|
|
%v = load <2 x i64>, <2 x i64>* %ptr
|
|
|
|
%shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
|
2015-12-09 06:17:11 +08:00
|
|
|
; ALL-LABEL: splat_mem_v4f64_from_v2f64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; ALL-NEXT: retq
|
[X86] Don't generate vbroadcasti128 for v4i64 splats from memory.
We used to erroneously match:
(v4i64 shuffle (v2i64 load), <0,0,0,0>)
Whereas vbroadcasti128 is more like:
(v4i64 shuffle (v2i64 load), <0,1,0,1>)
This problem doesn't exist for vbroadcastf128, which kept matching
the intrinsic after r231182. We should perhaps re-introduce the
intrinsic here as well, but that's a separate issue still being
discussed.
While there, add some proper vbroadcastf128 tests. We don't currently
match those, like for loading vbroadcastsd/ss on AVX (the reg-reg
broadcasts where added in AVX2).
Fixes PR23886.
llvm-svn: 240488
2015-06-24 08:07:16 +08:00
|
|
|
%v = load <2 x double>, <2 x double>* %ptr
|
|
|
|
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
|
2015-11-18 05:19:45 +08:00
|
|
|
; AVX1-LABEL: splat128_mem_v4i64_from_v2i64:
|
|
|
|
; AVX1: # BB#0:
|
2016-07-22 21:58:44 +08:00
|
|
|
; AVX1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2015-11-18 05:19:45 +08:00
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: splat128_mem_v4i64_from_v2i64:
|
|
|
|
; AVX2: # BB#0:
|
2016-09-29 13:54:28 +08:00
|
|
|
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2015-11-18 05:19:45 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
|
|
|
|
; AVX512VL: # BB#0:
|
2017-03-07 16:05:53 +08:00
|
|
|
; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
[X86] Don't generate vbroadcasti128 for v4i64 splats from memory.
We used to erroneously match:
(v4i64 shuffle (v2i64 load), <0,0,0,0>)
Whereas vbroadcasti128 is more like:
(v4i64 shuffle (v2i64 load), <0,1,0,1>)
This problem doesn't exist for vbroadcastf128, which kept matching
the intrinsic after r231182. We should perhaps re-introduce the
intrinsic here as well, but that's a separate issue still being
discussed.
While there, add some proper vbroadcastf128 tests. We don't currently
match those, like for loading vbroadcastsd/ss on AVX (the reg-reg
broadcasts where added in AVX2).
Fixes PR23886.
llvm-svn: 240488
2015-06-24 08:07:16 +08:00
|
|
|
%v = load <2 x i64>, <2 x i64>* %ptr
|
|
|
|
%shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
|
2017-03-07 16:05:53 +08:00
|
|
|
; ALL-LABEL: splat128_mem_v4f64_from_v2f64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
|
|
|
|
; ALL-NEXT: retq
|
[X86] Don't generate vbroadcasti128 for v4i64 splats from memory.
We used to erroneously match:
(v4i64 shuffle (v2i64 load), <0,0,0,0>)
Whereas vbroadcasti128 is more like:
(v4i64 shuffle (v2i64 load), <0,1,0,1>)
This problem doesn't exist for vbroadcastf128, which kept matching
the intrinsic after r231182. We should perhaps re-introduce the
intrinsic here as well, but that's a separate issue still being
discussed.
While there, add some proper vbroadcastf128 tests. We don't currently
match those, like for loading vbroadcastsd/ss on AVX (the reg-reg
broadcasts where added in AVX2).
Fixes PR23886.
llvm-svn: 240488
2015-06-24 08:07:16 +08:00
|
|
|
%v = load <2 x double>, <2 x double>* %ptr
|
|
|
|
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
|
|
|
ret <4 x double> %shuffle
|
|
|
|
}
|
|
|
|
|
2016-06-28 00:15:37 +08:00
|
|
|
define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
|
|
|
|
; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64:
|
|
|
|
; AVX1: # BB#0:
|
2016-06-28 16:08:15 +08:00
|
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
2016-06-28 00:15:37 +08:00
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: broadcast_v4f64_0000_from_v2i64:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: broadcast_v4f64_0000_from_v2i64:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
%2 = bitcast <4 x i64> %1 to <4 x double>
|
|
|
|
%3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x double> %3
|
|
|
|
}
|
|
|
|
|
2015-03-06 01:14:04 +08:00
|
|
|
define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
|
|
|
|
; AVX1-LABEL: bitcast_v4f64_0426:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: bitcast_v4f64_0426:
|
|
|
|
; AVX2: # BB#0:
|
2015-06-24 08:03:48 +08:00
|
|
|
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
2015-03-06 01:14:04 +08:00
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: bitcast_v4f64_0426:
|
|
|
|
; AVX512VL: # BB#0:
|
2015-11-18 06:35:45 +08:00
|
|
|
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
2015-11-17 16:03:43 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
2015-03-06 01:14:04 +08:00
|
|
|
%shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
|
|
|
|
%bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
|
|
|
|
%shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
|
|
|
%bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
|
|
|
|
%shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
|
|
|
|
%bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
|
|
|
|
ret <4 x double> %bitcast64
|
|
|
|
}
|
2015-08-19 04:51:15 +08:00
|
|
|
|
|
|
|
define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
|
2015-08-20 04:09:50 +08:00
|
|
|
; AVX1-LABEL: concat_v4i64_0167:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: concat_v4i64_0167:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
|
|
|
; AVX2-NEXT: retq
|
2015-11-17 16:03:43 +08:00
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: concat_v4i64_0167:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
2015-08-19 04:51:15 +08:00
|
|
|
%a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
|
|
|
|
%shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
|
|
ret <4 x i64> %shuffle64
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
|
2017-08-01 06:07:29 +08:00
|
|
|
; ALL-LABEL: concat_v4i64_0145_bc:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2015-08-19 04:51:15 +08:00
|
|
|
%a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
|
|
|
|
%a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
|
|
|
|
%bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
|
|
|
|
%bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
|
|
|
|
%shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
|
|
%shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
|
|
|
|
ret <4 x i64> %shuffle64
|
|
|
|
}
|
2015-08-21 04:59:41 +08:00
|
|
|
|
|
|
|
define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
|
2016-09-29 13:54:39 +08:00
|
|
|
; ALL-LABEL: insert_dup_mem_v4i64:
|
|
|
|
; ALL: # BB#0:
|
|
|
|
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
|
|
|
; ALL-NEXT: retq
|
2015-08-21 04:59:41 +08:00
|
|
|
%tmp = load i64, i64* %ptr, align 1
|
|
|
|
%tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
|
|
|
|
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
|
|
|
|
ret <4 x i64> %tmp2
|
|
|
|
}
|
2016-11-10 11:39:19 +08:00
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_1234:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_1234:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1234:
|
|
|
|
; AVX512VL: # BB#0:
|
2016-11-12 13:05:27 +08:00
|
|
|
; AVX512VL-NEXT: valignq {{.*#+}} ymm0 = ymm0[1,2,3],ymm1[0]
|
2016-11-10 11:39:19 +08:00
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_1230(<4 x i64> %a) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_1230:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
|
|
|
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_1230:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1230:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
|
|
|
|
ret <4 x i64> %shuffle
|
|
|
|
}
|
2017-03-21 21:30:40 +08:00
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_z0z3:
|
|
|
|
; AVX1: # BB#0:
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_z0z3:
|
|
|
|
; AVX2: # BB#0:
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_z0z3:
|
|
|
|
; AVX512VL: # BB#0:
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
2017-08-03 16:50:18 +08:00
|
|
|
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 4, i32 0, i32 4, i32 3>
|
|
|
|
ret <4 x i64> %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) {
|
|
|
|
; AVX1-LABEL: shuffle_v4i64_1z2z:
|
|
|
|
; AVX1: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
|
|
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
|
|
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
|
|
|
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
|
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX2-LABEL: shuffle_v4i64_1z2z:
|
|
|
|
; AVX2: # BB#0:
|
2017-07-28 01:47:01 +08:00
|
|
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
|
|
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0]
|
|
|
|
; AVX2-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512VL-LABEL: shuffle_v4i64_1z2z:
|
|
|
|
; AVX512VL: # BB#0:
|
2017-08-03 16:50:18 +08:00
|
|
|
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
2017-03-21 21:30:40 +08:00
|
|
|
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
|
|
|
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,0]
|
|
|
|
; AVX512VL-NEXT: retq
|
|
|
|
%1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 1, i32 4, i32 2, i32 4>
|
|
|
|
ret <4 x i64> %1
|
|
|
|
}
|