forked from OSchip/llvm-project
[AVX512] Change predicates on some vXi16/vXi8 AVX store patterns so they stay enabled unless VLX and BWI instructions are supported."
Without this we could fail instruction selection if VLX was enabled, but BWI wasn't. llvm-svn: 268885
This commit is contained in:
parent
e5ce84a33c
commit
906f397137
|
@ -965,18 +965,10 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
|
||||
// 256-bit load/store
|
||||
def : Pat<(alignedloadv4i64 addr:$src),
|
||||
|
@ -987,18 +979,10 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
|
||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
|
||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
|
||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v4i64 VR256:$src), addr:$dst),
|
||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v8i32 VR256:$src), addr:$dst),
|
||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v16i16 VR256:$src), addr:$dst),
|
||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
|
||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||
|
||||
// Special patterns for storing subvector extracts of lower 128-bits
|
||||
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
|
||||
|
@ -1041,6 +1025,28 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
|
||||
// 128-bit load/store
|
||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||
|
||||
// 256-bit load/store
|
||||
def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
|
||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
|
||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v16i16 VR256:$src), addr:$dst),
|
||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
|
||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||
}
|
||||
|
||||
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
||||
// The instructions selected below are then converted to MOVDQA/MOVDQU
|
||||
// during the SSE domain pass.
|
||||
|
|
Loading…
Reference in New Issue