forked from OSchip/llvm-project
[X86][SSE] Add test case for PR45604
This commit is contained in:
parent
46de0d5fe9
commit
d6db919bee
|
@ -3026,3 +3026,187 @@ define void @PR43024() {
|
|||
store float %8, float* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO - we're ignoring the i32->i16->i32 'ZERO_EXTEND_INREG' pattern, resulting in an bad movss .
|
||||
define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) {
|
||||
; SSE2-LABEL: PR45604:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSE2-NEXT: pextrw $2, %xmm1, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: movl $11, %eax
|
||||
; SSE2-NEXT: pinsrw $2, %eax, %xmm0
|
||||
; SSE2-NEXT: pextrw $3, %xmm1, %ecx
|
||||
; SSE2-NEXT: pinsrw $4, %ecx, %xmm0
|
||||
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
|
||||
; SSE2-NEXT: pextrw $4, %xmm1, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm2
|
||||
; SSE2-NEXT: pinsrw $2, %eax, %xmm2
|
||||
; SSE2-NEXT: pextrw $5, %xmm1, %ecx
|
||||
; SSE2-NEXT: pinsrw $4, %ecx, %xmm2
|
||||
; SSE2-NEXT: pinsrw $6, %eax, %xmm2
|
||||
; SSE2-NEXT: pextrw $6, %xmm1, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm3
|
||||
; SSE2-NEXT: pinsrw $2, %eax, %xmm3
|
||||
; SSE2-NEXT: pextrw $7, %xmm1, %ecx
|
||||
; SSE2-NEXT: pinsrw $4, %ecx, %xmm3
|
||||
; SSE2-NEXT: pinsrw $6, %eax, %xmm3
|
||||
; SSE2-NEXT: xorps %xmm4, %xmm4
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm1[0],xmm4[1,2,3]
|
||||
; SSE2-NEXT: pinsrw $2, %eax, %xmm4
|
||||
; SSE2-NEXT: pextrw $1, %xmm1, %ecx
|
||||
; SSE2-NEXT: pinsrw $4, %ecx, %xmm4
|
||||
; SSE2-NEXT: pinsrw $6, %eax, %xmm4
|
||||
; SSE2-NEXT: movdqa %xmm4, (%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm3, 48(%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm2, 32(%rdi)
|
||||
; SSE2-NEXT: movdqa %xmm0, 16(%rdi)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: PR45604:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSSE3-NEXT: pextrw $2, %xmm1, %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: movl $11, %eax
|
||||
; SSSE3-NEXT: pinsrw $2, %eax, %xmm0
|
||||
; SSSE3-NEXT: pextrw $3, %xmm1, %ecx
|
||||
; SSSE3-NEXT: pinsrw $4, %ecx, %xmm0
|
||||
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
|
||||
; SSSE3-NEXT: pextrw $4, %xmm1, %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm2
|
||||
; SSSE3-NEXT: pinsrw $2, %eax, %xmm2
|
||||
; SSSE3-NEXT: pextrw $5, %xmm1, %ecx
|
||||
; SSSE3-NEXT: pinsrw $4, %ecx, %xmm2
|
||||
; SSSE3-NEXT: pinsrw $6, %eax, %xmm2
|
||||
; SSSE3-NEXT: pextrw $6, %xmm1, %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm3
|
||||
; SSSE3-NEXT: pinsrw $2, %eax, %xmm3
|
||||
; SSSE3-NEXT: pextrw $7, %xmm1, %ecx
|
||||
; SSSE3-NEXT: pinsrw $4, %ecx, %xmm3
|
||||
; SSSE3-NEXT: pinsrw $6, %eax, %xmm3
|
||||
; SSSE3-NEXT: xorps %xmm4, %xmm4
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm4 = xmm1[0],xmm4[1,2,3]
|
||||
; SSSE3-NEXT: pinsrw $2, %eax, %xmm4
|
||||
; SSSE3-NEXT: pextrw $1, %xmm1, %ecx
|
||||
; SSSE3-NEXT: pinsrw $4, %ecx, %xmm4
|
||||
; SSSE3-NEXT: pinsrw $6, %eax, %xmm4
|
||||
; SSSE3-NEXT: movdqa %xmm4, (%rdi)
|
||||
; SSSE3-NEXT: movdqa %xmm3, 48(%rdi)
|
||||
; SSSE3-NEXT: movdqa %xmm2, 32(%rdi)
|
||||
; SSSE3-NEXT: movdqa %xmm0, 16(%rdi)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: PR45604:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa (%rsi), %xmm1
|
||||
; SSE41-NEXT: pextrw $2, %xmm1, %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: movl $11, %eax
|
||||
; SSE41-NEXT: pinsrw $2, %eax, %xmm0
|
||||
; SSE41-NEXT: pextrw $3, %xmm1, %ecx
|
||||
; SSE41-NEXT: pinsrw $4, %ecx, %xmm0
|
||||
; SSE41-NEXT: pinsrw $6, %eax, %xmm0
|
||||
; SSE41-NEXT: pextrw $4, %xmm1, %ecx
|
||||
; SSE41-NEXT: movd %ecx, %xmm2
|
||||
; SSE41-NEXT: pinsrw $2, %eax, %xmm2
|
||||
; SSE41-NEXT: pextrw $5, %xmm1, %ecx
|
||||
; SSE41-NEXT: pinsrw $4, %ecx, %xmm2
|
||||
; SSE41-NEXT: pinsrw $6, %eax, %xmm2
|
||||
; SSE41-NEXT: pextrw $6, %xmm1, %ecx
|
||||
; SSE41-NEXT: movd %ecx, %xmm3
|
||||
; SSE41-NEXT: pinsrw $2, %eax, %xmm3
|
||||
; SSE41-NEXT: pextrw $7, %xmm1, %ecx
|
||||
; SSE41-NEXT: pinsrw $4, %ecx, %xmm3
|
||||
; SSE41-NEXT: pinsrw $6, %eax, %xmm3
|
||||
; SSE41-NEXT: pxor %xmm4, %xmm4
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm1[0,1],xmm4[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: pinsrw $2, %eax, %xmm4
|
||||
; SSE41-NEXT: pextrw $1, %xmm1, %ecx
|
||||
; SSE41-NEXT: pinsrw $4, %ecx, %xmm4
|
||||
; SSE41-NEXT: pinsrw $6, %eax, %xmm4
|
||||
; SSE41-NEXT: movdqa %xmm4, (%rdi)
|
||||
; SSE41-NEXT: movdqa %xmm3, 48(%rdi)
|
||||
; SSE41-NEXT: movdqa %xmm2, 32(%rdi)
|
||||
; SSE41-NEXT: movdqa %xmm0, 16(%rdi)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: PR45604:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa (%rsi), %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [11,11,11,0,11,11,11,0]
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm1, 32(%rdi)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: PR45604:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovdqa (%rsi), %xmm0
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,0,0,0,0,0,0,0,0]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm1[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,0,2,1,4,4,6,5]
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <u,u,u,u,255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255>
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm2, %ymm4, %ymm2
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm5 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,1,3,4,5,5,7]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm7 = ymm6[2,0,3,1,4,5,6,7,10,8,11,9,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm7 = ymm7[0,1,1,3,4,5,5,7]
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm8 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u>
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm5, %ymm7, %ymm5
|
||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
|
||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,3,3,6,5,7,7]
|
||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm3 = ymm6[0,1,2,3,6,4,7,5,8,9,10,11,14,12,15,13]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[2,1,3,3,6,5,7,7]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm0, %ymm3, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0],ymm2[1],ymm5[2],ymm2[3],ymm5[4],ymm2[5],ymm5[6],ymm2[7]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-SLOW-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX2-SLOW-NEXT: vmovdqu %ymm2, (%rdi)
|
||||
; AVX2-SLOW-NEXT: vzeroupper
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: PR45604:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa (%rsi), %xmm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,4,5,2,3,6,7,2,3,6,7,12,13,14,15,16,17,20,21,18,19,22,23,18,19,22,23,28,29,30,31]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[4,5,0,1,6,7,2,3,6,7,2,3,12,13,14,15,20,21,16,17,22,23,18,19,22,23,18,19,28,29,30,31]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = <u,u,u,u,255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255>
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11]
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm4, {{.*}}(%rip), %ymm5, %ymm4
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm1, %ymm3, %ymm1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,12,13,4,5,6,7,10,11,14,15,10,11,14,15,24,25,28,29,20,21,22,23,26,27,30,31,26,27,30,31]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[12,13,8,9,4,5,6,7,14,15,10,11,14,15,10,11,28,29,24,25,20,21,22,23,30,31,26,27,30,31,26,27]
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm0
|
||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm4[1],ymm1[2],ymm4[3],ymm1[4],ymm4[5],ymm1[6],ymm4[7]
|
||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm4[1],ymm0[2],ymm4[3],ymm0[4],ymm4[5],ymm0[6],ymm4[7]
|
||||
; AVX2-FAST-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX2-FAST-NEXT: vmovdqu %ymm1, (%rdi)
|
||||
; AVX2-FAST-NEXT: vzeroupper
|
||||
; AVX2-FAST-NEXT: retq
|
||||
%v1 = load <8 x i16>, <8 x i16>* %src, align 16
|
||||
%v2 = shufflevector <8 x i16> %v1, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%v3 = shufflevector <16 x i16> %v2, <16 x i16> <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
|
||||
store <32 x i16> %v3, <32 x i16>* %dst, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue