From dce9c2a8119c7eb3dd34bc8178e54dd609e6d785 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 19 Feb 2019 17:23:55 +0000
Subject: [PATCH] [X86][AVX2] Hide VPBLENDD instructions behind AVX2 predicate

This was the cause of the regression in D57888 - the commuted load pattern wasn't hidden by the predicate so once we enabled v4i32 blends on SSE41+ targets then isel was incorrectly matched against AVX2+ instructions.

llvm-svn: 354358
---
 llvm/lib/Target/X86/X86InstrSSE.td | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index a55b1489e9e5..c37f12274384 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7753,12 +7753,14 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                             (commuteXForm imm:$src3))>;
 }
 
+let Predicates = [HasAVX2] in {
 defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
                                SchedWriteBlend.XMM, VR128, i128mem,
                                BlendCommuteImm4>;
 defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
                                 SchedWriteBlend.YMM, VR256, i256mem,
                                 BlendCommuteImm8>, VEX_L;
+}
 
 // For insertion into the zero index (low half) of a 256-bit vector, it is
 // more efficient to generate a blend with immediate instead of an insert*128.