[X86] Use v4i32 vzloads instead of v2i64 for vpmovzx/vpmovsx patterns where only 32-bits are loaded.

v2i64 vzload defines a 64-bit memory access. It doesn't look like
we have any coverage for this either way.

Also remove some vzload usages where the instruction loads only
16-bits.

llvm-svn: 364851
This commit is contained in:
Craig Topper 2019-07-01 21:25:11 +00:00
parent fa27500676
commit 3f722d40c5
3 changed files with 7 additions and 9 deletions

View File

@ -9738,15 +9738,13 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
@ -9761,7 +9759,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
@ -9785,7 +9783,7 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;

View File

@ -939,6 +939,8 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
def vzload_v4i32 : PatFrag<(ops node:$src),
(bitconvert (v4i32 (X86vzload node:$src)))>;
def vzload_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzload node:$src)))>;

View File

@ -5031,15 +5031,13 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
@ -5054,7 +5052,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;