From e37cdbeeabfb17821b9ff5d2f42e9f440882dab8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 24 Jan 2020 11:16:16 +0000 Subject: [PATCH] [X86][SSE] Add shufps+shufps test for fold through commutation As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway. --- .../test/CodeGen/X86/vector-shuffle-128-v4.ll | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 4b012e73f9cb..0462caf23fee 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) { %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } + +define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) { +; SSE-LABEL: shuffle_mem_v4f32_0624: +; SSE: # %bb.0: +; SSE-NEXT: movaps (%rdi), %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: shuffle_mem_v4f32_0624: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1 +; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0] +; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_mem_v4f32_0624: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovaps (%rdi), %xmm2 +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4] +; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1 +; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 +; AVX512VL-NEXT: retq + %1 = load <4 x float>, <4 x float>* %a1 + %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> + ret <4 x float> %2 +}