forked from OSchip/llvm-project
[X86] Add some broadcast-from-memory tests.
llvm-svn: 245612
This commit is contained in:
parent
ca3ef11a9b
commit
69a17acb74
|
@ -1370,3 +1370,95 @@ define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
|
|||
%bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8>
|
||||
ret <16 x i8> %bitcast8
|
||||
}
|
||||
|
||||
define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v16i8_i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_mem_v16i8_i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v16i8_i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v16i8_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
||||
%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsbl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsbl (%rdi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movsbl (%rdi), %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: movsbl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movsbl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i8, i8* %ptr, align 1
|
||||
%tmp1 = sext i8 %tmp to i32
|
||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
|
||||
%tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i8> %tmp4
|
||||
}
|
||||
|
|
|
@ -1133,6 +1133,29 @@ define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
|
|||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v2i64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v2i64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v2i64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 1
|
||||
%tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
|
||||
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %tmp2
|
||||
}
|
||||
|
||||
define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
|
||||
; SSE-LABEL: shuffle_mem_v2f64_10:
|
||||
; SSE: # BB#0:
|
||||
|
|
|
@ -1875,6 +1875,23 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
|
|||
ret <4 x float> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v4i32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_dup_mem_v4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vbroadcastss (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
;
|
||||
; Shuffle to logical bit shifts
|
||||
;
|
||||
|
|
|
@ -2145,3 +2145,87 @@ define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
|
|||
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v8i16_i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_mem_v8i16_i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v8i16_i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v8i16_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
%tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movswl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movswl (%rdi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movswl (%rdi), %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: movswl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movswl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
%tmp1 = sext i16 %tmp to i32
|
||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
|
||||
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %tmp4
|
||||
}
|
||||
|
|
|
@ -3290,3 +3290,46 @@ define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_b
|
|||
%shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
|
||||
ret <16 x i16> %shuffle16
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v16i16_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
%tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: movswl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movswl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
%tmp1 = sext i16 %tmp to i32
|
||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
|
||||
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i16> %tmp4
|
||||
}
|
||||
|
|
|
@ -1974,3 +1974,48 @@ define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_
|
|||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
|
||||
define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v32i8_i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v32i8_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
||||
%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> zeroinitializer
|
||||
ret <32 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: movsbl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v32i8_sext_i8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movsbl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i8, i8* %ptr, align 1
|
||||
%tmp1 = sext i8 %tmp to i32
|
||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
|
||||
%tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> zeroinitializer
|
||||
ret <32 x i8> %tmp4
|
||||
}
|
||||
|
|
|
@ -1002,3 +1002,14 @@ define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
|
|||
%shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
|
||||
ret <4 x i64> %shuffle64
|
||||
}
|
||||
|
||||
define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
|
||||
; ALL-LABEL: insert_dup_mem_v4i64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 1
|
||||
%tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
|
||||
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i64> %tmp2
|
||||
}
|
||||
|
|
|
@ -2127,3 +2127,14 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
%shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
|
||||
ret <8 x i32> %shuffle32
|
||||
}
|
||||
|
||||
define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
|
||||
; ALL-LABEL: insert_dup_mem_v8i32:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vbroadcastss (%rdi), %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i32> %tmp2
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue