forked from OSchip/llvm-project
Move MOVAPS,MOVUPS patterns close to the instructions definition
llvm-svn: 138896
This commit is contained in:
parent
941001312a
commit
21a180367b
|
@ -696,7 +696,98 @@ let Predicates = [HasSSE2] in
|
||||||
def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
|
def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
|
||||||
(MOVUPDmr addr:$dst, VR128:$src)>;
|
(MOVUPDmr addr:$dst, VR128:$src)>;
|
||||||
|
|
||||||
// Move Low/High packed floating point values
|
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
||||||
|
// The instructions selected below are then converted to MOVDQA/MOVDQU
|
||||||
|
// during the SSE domain pass.
|
||||||
|
let Predicates = [HasSSE1] in {
|
||||||
|
def : Pat<(alignedloadv4i32 addr:$src),
|
||||||
|
(MOVAPSrm addr:$src)>;
|
||||||
|
def : Pat<(loadv4i32 addr:$src),
|
||||||
|
(MOVUPSrm addr:$src)>;
|
||||||
|
def : Pat<(alignedloadv2i64 addr:$src),
|
||||||
|
(MOVAPSrm addr:$src)>;
|
||||||
|
def : Pat<(loadv2i64 addr:$src),
|
||||||
|
(MOVUPSrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(MOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
|
||||||
|
(MOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||||
|
(MOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
||||||
|
(MOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(MOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
||||||
|
(MOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||||
|
(MOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||||
|
(MOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use vmovaps/vmovups for AVX integer load/store.
|
||||||
|
let Predicates = [HasAVX] in {
|
||||||
|
// 128-bit load/store
|
||||||
|
def : Pat<(alignedloadv4i32 addr:$src),
|
||||||
|
(VMOVAPSrm addr:$src)>;
|
||||||
|
def : Pat<(loadv4i32 addr:$src),
|
||||||
|
(VMOVUPSrm addr:$src)>;
|
||||||
|
def : Pat<(alignedloadv2i64 addr:$src),
|
||||||
|
(VMOVAPSrm addr:$src)>;
|
||||||
|
def : Pat<(loadv2i64 addr:$src),
|
||||||
|
(VMOVUPSrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
|
||||||
|
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||||
|
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
||||||
|
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
||||||
|
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||||
|
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||||
|
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
||||||
|
|
||||||
|
// 256-bit load/store
|
||||||
|
def : Pat<(alignedloadv4i64 addr:$src),
|
||||||
|
(VMOVAPSYrm addr:$src)>;
|
||||||
|
def : Pat<(loadv4i64 addr:$src),
|
||||||
|
(VMOVUPSYrm addr:$src)>;
|
||||||
|
def : Pat<(alignedloadv8i32 addr:$src),
|
||||||
|
(VMOVAPSYrm addr:$src)>;
|
||||||
|
def : Pat<(loadv8i32 addr:$src),
|
||||||
|
(VMOVUPSYrm addr:$src)>;
|
||||||
|
def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
|
||||||
|
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
|
||||||
|
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
|
||||||
|
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
|
||||||
|
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(store (v4i64 VR256:$src), addr:$dst),
|
||||||
|
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(store (v8i32 VR256:$src), addr:$dst),
|
||||||
|
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(store (v16i16 VR256:$src), addr:$dst),
|
||||||
|
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
|
||||||
|
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// SSE 1 & 2 - Move Low/High packed FP Instructions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
|
multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
|
||||||
PatFrag mov_frag, string base_opc,
|
PatFrag mov_frag, string base_opc,
|
||||||
string asm_opr> {
|
string asm_opr> {
|
||||||
|
@ -4584,93 +4675,6 @@ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||||
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
|
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
|
||||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
|
||||||
// The instructions selected below are then converted to MOVDQA/MOVDQU
|
|
||||||
// during the SSE domain pass.
|
|
||||||
let Predicates = [HasSSE1] in {
|
|
||||||
def : Pat<(alignedloadv4i32 addr:$src),
|
|
||||||
(MOVAPSrm addr:$src)>;
|
|
||||||
def : Pat<(loadv4i32 addr:$src),
|
|
||||||
(MOVUPSrm addr:$src)>;
|
|
||||||
def : Pat<(alignedloadv2i64 addr:$src),
|
|
||||||
(MOVAPSrm addr:$src)>;
|
|
||||||
def : Pat<(loadv2i64 addr:$src),
|
|
||||||
(MOVUPSrm addr:$src)>;
|
|
||||||
|
|
||||||
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
|
|
||||||
(MOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
|
|
||||||
(MOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
|
||||||
(MOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
|
||||||
(MOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
|
||||||
(MOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
|
||||||
(MOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
|
||||||
(MOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
|
||||||
(MOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use vmovaps/vmovups for AVX integer load/store.
|
|
||||||
let Predicates = [HasAVX] in {
|
|
||||||
// 128-bit load/store
|
|
||||||
def : Pat<(alignedloadv4i32 addr:$src),
|
|
||||||
(VMOVAPSrm addr:$src)>;
|
|
||||||
def : Pat<(loadv4i32 addr:$src),
|
|
||||||
(VMOVUPSrm addr:$src)>;
|
|
||||||
def : Pat<(alignedloadv2i64 addr:$src),
|
|
||||||
(VMOVAPSrm addr:$src)>;
|
|
||||||
def : Pat<(loadv2i64 addr:$src),
|
|
||||||
(VMOVUPSrm addr:$src)>;
|
|
||||||
|
|
||||||
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
|
|
||||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
|
|
||||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
|
||||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
|
||||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
|
||||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
|
||||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
|
||||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
|
||||||
(VMOVUPSmr addr:$dst, VR128:$src)>;
|
|
||||||
|
|
||||||
// 256-bit load/store
|
|
||||||
def : Pat<(alignedloadv4i64 addr:$src),
|
|
||||||
(VMOVAPSYrm addr:$src)>;
|
|
||||||
def : Pat<(loadv4i64 addr:$src),
|
|
||||||
(VMOVUPSYrm addr:$src)>;
|
|
||||||
def : Pat<(alignedloadv8i32 addr:$src),
|
|
||||||
(VMOVAPSYrm addr:$src)>;
|
|
||||||
def : Pat<(loadv8i32 addr:$src),
|
|
||||||
(VMOVUPSYrm addr:$src)>;
|
|
||||||
def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
|
|
||||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
|
|
||||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst),
|
|
||||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst),
|
|
||||||
(VMOVAPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(store (v4i64 VR256:$src), addr:$dst),
|
|
||||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(store (v8i32 VR256:$src), addr:$dst),
|
|
||||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(store (v16i16 VR256:$src), addr:$dst),
|
|
||||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
|
|
||||||
(VMOVUPSYmr addr:$dst, VR256:$src)>;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE4.1 - Packed Move with Sign/Zero Extend
|
// SSE4.1 - Packed Move with Sign/Zero Extend
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
Loading…
Reference in New Issue