forked from OSchip/llvm-project
[X86] Add test cases for failures to form vbroadcastw due to isTypeDesirableForOp preventing load shrinking to i16.
These are based on existing test cases but use i64 instead of i32. Some of these end up with i64 zextload/extloads from i16 that we don't have isel patterns for. Some of the other cases fail because isTypeDesirableForOp prevents shrinking the (trunc (i64 (srl (load)))) directly. So we try to shrink based on the (i64 (srl (load))) but we need 64 - shift_amount to be a power of 2 to do that shrink.
This commit is contained in:
parent
a198adb490
commit
51a4c6125c
|
@ -3226,3 +3226,253 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
|
|||
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <8 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_mem_v8i16_i64(i64* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v8i16_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v8i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i64:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_mem_v8i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_mem_v8i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastw (%rdi), %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_elt1_mem_v8i16_i64(i64* %ptr) {
|
||||
; SSE-LABEL: insert_dup_elt1_mem_v8i16_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: shrq $16, %rax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movq (%rdi), %rax
|
||||
; AVX512VL-NEXT: shrq $16, %rax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_elt1_mem_v8i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_elt1_mem_v8i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movq (%rdi), %rax
|
||||
; XOPAVX2-NEXT: shrq $16, %rax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_elt3_mem_v8i16_i64(i64* %ptr) {
|
||||
; SSE-LABEL: insert_dup_elt3_mem_v8i16_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_elt3_mem_v8i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_elt3_mem_v8i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_elt7_mem_v8i16_i64(i64* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,6,7]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_elt7_mem_v8i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @insert_dup_mem_v8i16_sext_i16_i64(i16* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movzwl (%rdi), %eax
|
||||
; SSE-NEXT: movq %rax, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: movzwl (%rdi), %eax
|
||||
; XOPAVX1-NEXT: vmovq %rax, %xmm0
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_mem_v8i16_sext_i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movzwl (%rdi), %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
%tmp1 = sext i16 %tmp to i64
|
||||
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
|
||||
%tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
|
||||
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %tmp4
|
||||
}
|
||||
|
|
|
@ -7458,6 +7458,215 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
|
|||
ret <16 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_mem_v16i16_i64(i64* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_i64:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %ymm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_mem_v16i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_mem_v16i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastw (%rdi), %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_elt1_mem_v16i16_i64(i64* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movq (%rdi), %rax
|
||||
; AVX2-NEXT: shrq $16, %rax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movq (%rdi), %rax
|
||||
; AVX512VL-NEXT: shrq $16, %rax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movq (%rdi), %rax
|
||||
; XOPAVX2-NEXT: shrq $16, %rax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
ret <16 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_elt3_mem_v16i16_i64(i64* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <16 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_elt7_mem_v16i16_i64(i64* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movzwl 6(%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movzwl 6(%rdi), %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
ret <16 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <16 x i16> @insert_dup_mem_v16i16_sext_i16_i64(i16* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovq %rax, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: movzwl (%rdi), %eax
|
||||
; XOPAVX1-NEXT: vmovq %rax, %xmm0
|
||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: movzwl (%rdi), %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm0
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
%tmp1 = sext i16 %tmp to i64
|
||||
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
|
||||
%tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
|
||||
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <16 x i16> @unpckh_v16i16(<16 x i16> %x, <16 x i16> %y) {
|
||||
; AVX1-LABEL: unpckh_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
|
|
|
@ -289,6 +289,111 @@ define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 {
|
|||
ret <32 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <32 x i16> @insert_dup_mem_v16i16_i64(i64* %ptr) {
|
||||
; KNL-LABEL: insert_dup_mem_v16i16_i64:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpbroadcastw (%rdi), %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: insert_dup_mem_v16i16_i64:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpbroadcastw (%rdi), %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> zeroinitializer
|
||||
ret <32 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <32 x i16> @insert_dup_elt1_mem_v16i16_i64(i64* %ptr) {
|
||||
; KNL-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movq (%rdi), %rax
|
||||
; KNL-NEXT: shrq $16, %rax
|
||||
; KNL-NEXT: vmovd %eax, %xmm0
|
||||
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: insert_dup_elt1_mem_v16i16_i64:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movq (%rdi), %rax
|
||||
; SKX-NEXT: shrq $16, %rax
|
||||
; SKX-NEXT: vpbroadcastw %eax, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
ret <32 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <32 x i16> @insert_dup_elt3_mem_v16i16_i64(i64* %ptr) {
|
||||
; KNL-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movzwl 6(%rdi), %eax
|
||||
; KNL-NEXT: vmovd %eax, %xmm0
|
||||
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: insert_dup_elt3_mem_v16i16_i64:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movzwl 6(%rdi), %eax
|
||||
; SKX-NEXT: vpbroadcastw %eax, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <32 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <32 x i16> @insert_dup_elt7_mem_v16i16_i64(i64* %ptr) {
|
||||
; KNL-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movzwl 6(%rdi), %eax
|
||||
; KNL-NEXT: vmovd %eax, %xmm0
|
||||
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: insert_dup_elt7_mem_v16i16_i64:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movzwl 6(%rdi), %eax
|
||||
; SKX-NEXT: vpbroadcastw %eax, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%tmp = load i64, i64* %ptr, align 4
|
||||
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 1
|
||||
%tmp2 = bitcast <2 x i64> %tmp1 to <8 x i16>
|
||||
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
|
||||
ret <32 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <32 x i16> @insert_dup_mem_v16i16_sext_i16_i64(i16* %ptr) {
|
||||
; KNL-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movzwl (%rdi), %eax
|
||||
; KNL-NEXT: vmovd %eax, %xmm0
|
||||
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: insert_dup_mem_v16i16_sext_i16_i64:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movzwl (%rdi), %eax
|
||||
; SKX-NEXT: vpbroadcastw %eax, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
%tmp1 = sext i16 %tmp to i64
|
||||
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %tmp1, i32 0
|
||||
%tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
|
||||
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <32 x i32> zeroinitializer
|
||||
ret <32 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <32 x i16> @shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i16> %a) {
|
||||
; KNL-LABEL: shuffle_v32i16_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
|
||||
; KNL: ## %bb.0:
|
||||
|
|
Loading…
Reference in New Issue