From 427404c769c60c8508d02c1050c85e4ac355c648 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 22 Feb 2019 00:04:35 +0000
Subject: [PATCH] [X86] Fix some copy/paste mistakes that caused a VR128 to be
 used as the address of a load in an isel pattern

This was introduced in r354511.

Fixes PR40811.

llvm-svn: 354640
---
 llvm/lib/Target/X86/X86InstrSSE.td |  8 ++++----
 llvm/test/CodeGen/X86/pr40811.ll   | 17 +++++++++++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr40811.ll

diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index d5a6273088f5..e61666781b0c 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -6535,9 +6535,9 @@ def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
 
 def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
           (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(X86Blendi VR128:$src1, (loadv4i32 VR128:$src2), imm:$src3),
+def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
           (VBLENDPSrmi VR128:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(X86Blendi (loadv4i32 VR128:$src2), VR128:$src1, imm:$src3),
+def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
           (VBLENDPSrmi VR128:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
 }
 
@@ -6562,9 +6562,9 @@ def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
 def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
           (BLENDPSrri VR128:$src1, VR128:$src2, imm:$src3)>;
 def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
-          (BLENDPSrmi VR128:$src1,addr:$src2, imm:$src3)>;
+          (BLENDPSrmi VR128:$src1, addr:$src2, imm:$src3)>;
 def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
-          (BLENDPSrmi VR128:$src1,addr:$src2, (BlendCommuteImm4 imm:$src3))>;
+          (BLENDPSrmi VR128:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
 }
 
 // For insertion into the zero index (low half) of a 256-bit vector, it is
diff --git a/llvm/test/CodeGen/X86/pr40811.ll b/llvm/test/CodeGen/X86/pr40811.ll
new file mode 100644
index 000000000000..fca947ad4c70
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr40811.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -o - -mcpu=btver2 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define <8 x i32> @_Z6test70v(<4 x i32>* %id14793) {
+; CHECK-LABEL: _Z6test70v:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2,3]
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,3,1,0]
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %id14793.0.id14793.0. = load <4 x i32>, <4 x i32>* %id14793, align 16
+  %shuffle = shufflevector <4 x i32> %id14793.0.id14793.0., <4 x i32> <i32 undef, i32 -1052558824, i32 undef, i32 undef>, <8 x i32> <i32 0, i32 2, i32 5, i32 0, i32 1, i32 3, i32 1, i32 0>
+  ret <8 x i32> %shuffle
+}