From 906f3971374af132ff052af3a3d6cc56b949738f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 8 May 2016 23:08:40 +0000 Subject: [PATCH] [AVX512] Change predicates on some vXi16/vXi8 AVX store patterns so they stay enabled unless VLX and BWI instructions are supported." Without this we could fail instruction selection if VLX was enabled, but BWI wasn't. llvm-svn: 268885 --- llvm/lib/Target/X86/X86InstrSSE.td | 38 +++++++++++++++++------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 814247899cee..52b288926ef0 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -965,18 +965,10 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v2i64 VR128:$src), addr:$dst), (VMOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; // 256-bit load/store def : Pat<(alignedloadv4i64 addr:$src), @@ -987,18 +979,10 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVAPSYmr addr:$dst, VR256:$src)>; def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), - (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), - (VMOVAPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v4i64 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v8i32 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; - def : Pat<(store (v16i16 VR256:$src), addr:$dst), - (VMOVUPSYmr addr:$dst, VR256:$src)>; - def : Pat<(store (v32i8 VR256:$src), addr:$dst), - (VMOVUPSYmr addr:$dst, VR256:$src)>; // Special patterns for storing subvector extracts of lower 128-bits // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr @@ -1041,6 +1025,28 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; } +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { + // 128-bit load/store + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + + // 256-bit load/store + def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v16i16 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v32i8 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; +} + // Use movaps / movups for SSE integer load / store (one byte shorter). // The instructions selected below are then converted to MOVDQA/MOVDQU // during the SSE domain pass.