From dce9c2a8119c7eb3dd34bc8178e54dd609e6d785 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 19 Feb 2019 17:23:55 +0000 Subject: [PATCH] [X86][AVX2] Hide VPBLENDD instructions behind AVX2 predicate This was the cause of the regression in D57888 - the commuted load pattern wasn't hidden by the predicate so once we enabled v4i32 blends on SSE41+ targets then isel was incorrectly matched against AVX2+ instructions. llvm-svn: 354358 --- llvm/lib/Target/X86/X86InstrSSE.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a55b1489e9e5..c37f12274384 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7753,12 +7753,14 @@ multiclass AVX2_blend_rmi opc, string OpcodeStr, SDNode OpNode, (commuteXForm imm:$src3))>; } +let Predicates = [HasAVX2] in { defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32, SchedWriteBlend.XMM, VR128, i128mem, BlendCommuteImm4>; defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, SchedWriteBlend.YMM, VR256, i256mem, BlendCommuteImm8>, VEX_L; +} // For insertion into the zero index (low half) of a 256-bit vector, it is // more efficient to generate a blend with immediate instead of an insert*128.