[X86][SSE] Add shufps+shufps test for fold through commutation

As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway.
This commit is contained in:
Simon Pilgrim 2020-01-24 11:16:16 +00:00
parent d4b092b341
commit e37cdbeeab
1 changed files with 28 additions and 0 deletions

View File

@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
; SSE-LABEL: shuffle_mem_v4f32_0624:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1
; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovaps (%rdi), %xmm2
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4]
; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1
; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
; AVX512VL-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a1
%2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
ret <4 x float> %2
}