forked from OSchip/llvm-project
[X86][SSE] Add shufps+shufps test for fold through commutation
As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway.
This commit is contained in:
parent
d4b092b341
commit
e37cdbeeab
|
@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
|
|||
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
ret <4 x float> %shuffle
|
||||
}
|
||||
|
||||
define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
|
||||
; SSE-LABEL: shuffle_mem_v4f32_0624:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movaps (%rdi), %xmm1
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1
|
||||
; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
|
||||
; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovaps (%rdi), %xmm2
|
||||
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4]
|
||||
; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1
|
||||
; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = load <4 x float>, <4 x float>* %a1
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue