From e37cdbeeabfb17821b9ff5d2f42e9f440882dab8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 24 Jan 2020 11:16:16 +0000
Subject: [PATCH] [X86][SSE] Add shufps+shufps test for fold through
 commutation

As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway.
---
 .../test/CodeGen/X86/vector-shuffle-128-v4.ll | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 4b012e73f9cb..0462caf23fee 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   ret <4 x float> %shuffle
 }
+
+define  <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
+; SSE-LABEL: shuffle_mem_v4f32_0624:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps (%rdi), %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vmovaps (%rdi), %xmm1
+; AVX1OR2-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
+; AVX1OR2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1OR2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovaps (%rdi), %xmm2
+; AVX512VL-NEXT:    vmovaps {{.*#+}} xmm1 = [0,6,2,4]
+; AVX512VL-NEXT:    vpermi2ps %xmm0, %xmm2, %xmm1
+; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512VL-NEXT:    retq
+  %1 = load <4 x float>, <4 x float>* %a1
+  %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+  ret <4 x float> %2
+}