forked from OSchip/llvm-project
AVX-512: Implemented encoding , DAG lowering and intrinsics for Integer Truncate with/without saturation
Added tests for DAG lowering ,encoding and intrinsic Differential Revision: http://reviews.llvm.org/D11218 llvm-svn: 242990
This commit is contained in:
parent
ac7947ec32
commit
da1b2ea955
|
@ -5816,6 +5816,550 @@ let TargetPrefix = "x86" in {
|
|||
llvm_i8_ty], [IntrReadArgMem]>;
|
||||
|
||||
}
|
||||
|
||||
// truncate
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_pmov_qb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qb_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qb_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qb_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qb_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qb_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qb_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qb_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqb512_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qb_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qb_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qb_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qb_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qb_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qw_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qw_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qw_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqw256_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qw_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qw_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qw_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qw_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqw512_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qw_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qw_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qw_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qw_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qw_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qd_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qd_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qd_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqd256_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qd_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qd_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qd_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_qd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqd512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_qd_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_qd_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_qd_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_db_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_db_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_db_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_db_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_db_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_db_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_db_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_db_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_db_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_db_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_db_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_db_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_db_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdb512_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_db_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_db_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_db_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_db_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_db_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_dw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_dw_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_dw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_dw_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_dw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_dw_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_dw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdw256_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_dw_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_dw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_dw_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_dw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">,
|
||||
Intrinsic<[llvm_v8i16_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_dw_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_dw_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdw512_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_dw_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_dw_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_dw_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_dw_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_dw_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_wb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovwb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_wb_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_wb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovswb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_wb_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_wb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_wb_mem_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_wb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovwb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_wb_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_wb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovswb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_wb_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_wb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_wb_mem_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmov_wb_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovwb512_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty],
|
||||
[llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmov_wb_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovs_wb_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovswb512_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty],
|
||||
[llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovs_wb_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
def int_x86_avx512_mask_pmovus_wb_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty],
|
||||
[llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pmovus_wb_mem_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">,
|
||||
Intrinsic<[],
|
||||
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem]>;
|
||||
}
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_cmp_ps_512 :
|
||||
|
|
|
@ -493,9 +493,10 @@ def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
|
|||
def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore,
|
||||
// Do not use mld, mst directly. Use masked_store masked_load, masked_truncstore
|
||||
def mst : SDNode<"ISD::MSTORE", SDTMaskedStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad,
|
||||
def mld : SDNode<"ISD::MLOAD", SDTMaskedLoad,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
@ -680,6 +681,12 @@ def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
|
|||
return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
|
||||
}]>;
|
||||
|
||||
// masked load fragments.
|
||||
def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(mld node:$src1, node:$src2, node:$src3), [{
|
||||
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
|
||||
}]>;
|
||||
|
||||
// extending load fragments.
|
||||
def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
|
||||
|
@ -791,6 +798,12 @@ def store : PatFrag<(ops node:$val, node:$ptr),
|
|||
return !cast<StoreSDNode>(N)->isTruncatingStore();
|
||||
}]>;
|
||||
|
||||
// masked store fragments.
|
||||
def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(mst node:$src1, node:$src2, node:$src3), [{
|
||||
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
|
||||
}]>;
|
||||
|
||||
// truncstore fragments.
|
||||
def truncstore : PatFrag<(ops node:$val, node:$ptr),
|
||||
(unindexedstore node:$val, node:$ptr), [{
|
||||
|
@ -817,6 +830,21 @@ def truncstoref64 : PatFrag<(ops node:$val, node:$ptr),
|
|||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f64;
|
||||
}]>;
|
||||
|
||||
def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
||||
}]>;
|
||||
|
||||
def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
|
||||
}]>;
|
||||
|
||||
def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
// indexed store fragments.
|
||||
def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
|
||||
(ist node:$val, node:$base, node:$offset), [{
|
||||
|
@ -891,6 +919,27 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
|
|||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
|
||||
}]>;
|
||||
|
||||
// masked truncstore fragments
|
||||
def masked_truncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(mst node:$src1, node:$src2, node:$src3), [{
|
||||
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
|
||||
}]>;
|
||||
def masked_truncstorevi8 :
|
||||
PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_truncstore node:$src1, node:$src2, node:$src3), [{
|
||||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
||||
}]>;
|
||||
def masked_truncstorevi16 :
|
||||
PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_truncstore node:$src1, node:$src2, node:$src3), [{
|
||||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
|
||||
}]>;
|
||||
def masked_truncstorevi32 :
|
||||
PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_truncstore node:$src1, node:$src2, node:$src3), [{
|
||||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
// setcc convenience fragments.
|
||||
def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(setcc node:$lhs, node:$rhs, SETOEQ)>;
|
||||
|
|
|
@ -1348,6 +1348,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
|
||||
|
||||
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
|
||||
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
|
||||
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
|
||||
if (Subtarget->hasVLX()){
|
||||
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
|
||||
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
|
||||
setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
|
||||
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
|
||||
}
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
|
||||
|
@ -1556,6 +1574,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
|
||||
|
||||
setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
|
||||
setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
|
||||
|
@ -1566,6 +1585,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
|
||||
setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
|
||||
|
||||
setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
|
||||
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
|
||||
if (Subtarget->hasVLX())
|
||||
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
|
||||
|
||||
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
|
||||
const MVT VT = (MVT::SimpleValueType)i;
|
||||
|
||||
|
@ -12485,10 +12509,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
|||
Subtarget->hasDQI() && Subtarget->hasVLX())
|
||||
return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
|
||||
}
|
||||
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
|
||||
if (VT.getVectorElementType().getSizeInBits() >=8)
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
|
||||
|
||||
if (VT.getVectorElementType() == MVT::i1) {
|
||||
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
|
||||
unsigned NumElts = InVT.getVectorNumElements();
|
||||
assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
|
||||
|
@ -12504,6 +12526,11 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
|
||||
}
|
||||
|
||||
// vpmovqb/w/d, vpmovdb/w, vpmovwb
|
||||
if (((!InVT.is512BitVector() && Subtarget->hasVLX()) || InVT.is512BitVector()) &&
|
||||
(InVT.getVectorElementType() != MVT::i16 || Subtarget->hasBWI()))
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
|
||||
|
||||
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
|
||||
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
|
||||
if (Subtarget->hasInt256()) {
|
||||
|
@ -15220,7 +15247,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
|
|||
|
||||
/// \brief Return (and \p Op, \p Mask) for compare instructions or
|
||||
/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
|
||||
/// necessary casting for \p Mask when lowering masking intrinsics.
|
||||
/// necessary casting or extending for \p Mask when lowering masking intrinsics
|
||||
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
||||
SDValue PreservedSrc,
|
||||
const X86Subtarget *Subtarget,
|
||||
|
@ -15228,8 +15255,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
|||
EVT VT = Op.getValueType();
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
MVT::i1, VT.getVectorNumElements());
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
SDValue VMask = SDValue();
|
||||
unsigned OpcodeSelect = ISD::VSELECT;
|
||||
SDLoc dl(Op);
|
||||
|
||||
assert(MaskVT.isSimple() && "invalid mask type");
|
||||
|
@ -15237,11 +15264,20 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
|||
if (isAllOnes(Mask))
|
||||
return Op;
|
||||
|
||||
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
|
||||
// are extracted by EXTRACT_SUBVECTOR.
|
||||
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getBitcast(BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (MaskVT.bitsGT(Mask.getValueType())) {
|
||||
EVT newMaskVT = EVT::getIntegerVT(*DAG.getContext(),
|
||||
MaskVT.getSizeInBits());
|
||||
VMask = DAG.getBitcast(MaskVT,
|
||||
DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT, Mask));
|
||||
} else {
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
|
||||
// are extracted by EXTRACT_SUBVECTOR.
|
||||
VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getBitcast(BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
default: break;
|
||||
|
@ -15250,10 +15286,18 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
|||
case X86ISD::CMPM:
|
||||
case X86ISD::CMPMU:
|
||||
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
|
||||
case X86ISD::VTRUNC:
|
||||
case X86ISD::VTRUNCS:
|
||||
case X86ISD::VTRUNCUS:
|
||||
// We can't use ISD::VSELECT here because it is not always "Legal"
|
||||
// for the destination type. For example vpmovqb require only AVX512
|
||||
// and vselect that can operate on byte element type require BWI
|
||||
OpcodeSelect = X86ISD::SELECT;
|
||||
break;
|
||||
}
|
||||
if (PreservedSrc.getOpcode() == ISD::UNDEF)
|
||||
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
|
||||
return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
|
||||
}
|
||||
|
||||
/// \brief Creates an SDNode for a predicated scalar operation.
|
||||
|
@ -16111,6 +16155,45 @@ static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
|
|||
return Chain;
|
||||
}
|
||||
|
||||
/// \brief Lower intrinsics for TRUNCATE_TO_MEM case
|
||||
/// return truncate Store/MaskedStore Node
|
||||
static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op,
|
||||
SelectionDAG &DAG,
|
||||
MVT ElementType) {
|
||||
SDLoc dl(Op);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
SDValue DataToTruncate = Op.getOperand(3);
|
||||
SDValue Addr = Op.getOperand(2);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
|
||||
EVT VT = DataToTruncate.getValueType();
|
||||
EVT SVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
ElementType, VT.getVectorNumElements());
|
||||
|
||||
if (isAllOnes(Mask)) // return just a truncate store
|
||||
return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr,
|
||||
MachinePointerInfo(), SVT, false, false,
|
||||
SVT.getScalarSizeInBits()/8);
|
||||
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
MVT::i1, VT.getVectorNumElements());
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
|
||||
// are extracted by EXTRACT_SUBVECTOR.
|
||||
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getBitcast(BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MachinePointerInfo(),
|
||||
MachineMemOperand::MOStore, SVT.getStoreSize(),
|
||||
SVT.getScalarSizeInBits()/8);
|
||||
|
||||
return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr,
|
||||
VMask, SVT, MMO, true);
|
||||
}
|
||||
|
||||
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
|
@ -16244,6 +16327,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
|
|||
MachinePointerInfo(), false, false,
|
||||
VT.getScalarSizeInBits()/8);
|
||||
}
|
||||
case TRUNCATE_TO_MEM_VI8:
|
||||
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8);
|
||||
case TRUNCATE_TO_MEM_VI16:
|
||||
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16);
|
||||
case TRUNCATE_TO_MEM_VI32:
|
||||
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
|
||||
case EXPAND_FROM_MEM: {
|
||||
SDLoc dl(Op);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
|
@ -18954,7 +19043,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::VZEXT: return "X86ISD::VZEXT";
|
||||
case X86ISD::VSEXT: return "X86ISD::VSEXT";
|
||||
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
|
||||
case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
|
||||
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
|
||||
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
|
||||
case X86ISD::VINSERT: return "X86ISD::VINSERT";
|
||||
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
|
||||
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
|
||||
|
@ -24093,6 +24183,15 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
|
|||
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
// The truncating store is legal in some cases. For example
|
||||
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
|
||||
// are designated for truncate store.
|
||||
// In this case we don't need any further transformations.
|
||||
if (TLI.isTruncStoreLegal(VT, StVT))
|
||||
return SDValue();
|
||||
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
|
||||
"Unexpected size for truncating masked store");
|
||||
|
@ -24204,6 +24303,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
|||
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
|
||||
|
||||
// The truncating store is legal in some cases. For example
|
||||
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
|
||||
// are designated for truncate store.
|
||||
// In this case we don't need any further transformations.
|
||||
if (TLI.isTruncStoreLegal(VT, StVT))
|
||||
return SDValue();
|
||||
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
|
||||
// We are going to use the original vector elt for storing.
|
||||
|
|
|
@ -282,9 +282,8 @@ namespace llvm {
|
|||
|
||||
// Vector integer truncate.
|
||||
VTRUNC,
|
||||
|
||||
// Vector integer truncate with mask.
|
||||
VTRUNCM,
|
||||
// Vector integer truncate with unsigned/signed saturation.
|
||||
VTRUNCUS, VTRUNCS,
|
||||
|
||||
// Vector FP extend.
|
||||
VFPEXT,
|
||||
|
|
|
@ -5571,82 +5571,217 @@ defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W
|
|||
// Integer truncate and extend operations
|
||||
//-------------------------------------------------
|
||||
|
||||
multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass dstRC, RegisterClass srcRC,
|
||||
RegisterClass KRC, X86MemOperand x86memop> {
|
||||
def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
|
||||
(ins srcRC:$src),
|
||||
!strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
|
||||
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
|
||||
X86MemOperand x86memop> {
|
||||
|
||||
defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
|
||||
(ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
|
||||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
|
||||
EVEX, T8XS;
|
||||
|
||||
// for intrinsic patter match
|
||||
def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
|
||||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
|
||||
undef)),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
|
||||
SrcInfo.RC:$src1)>;
|
||||
|
||||
def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
|
||||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
|
||||
DestInfo.ImmAllZerosV)),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
|
||||
SrcInfo.RC:$src1)>;
|
||||
|
||||
def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
|
||||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
|
||||
DestInfo.RC:$src0)),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
|
||||
DestInfo.KRCWM:$mask ,
|
||||
SrcInfo.RC:$src1)>;
|
||||
|
||||
let mayStore = 1 in {
|
||||
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
|
||||
(ins x86memop:$dst, SrcInfo.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst |$dst, $src}",
|
||||
[]>, EVEX;
|
||||
|
||||
def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
|
||||
(ins KRC:$mask, srcRC:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
|
||||
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
|
||||
(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
|
||||
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
|
||||
[]>, EVEX, EVEX_K;
|
||||
|
||||
def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
|
||||
(ins KRC:$mask, srcRC:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
|
||||
[]>, EVEX, EVEX_KZ;
|
||||
|
||||
def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, EVEX;
|
||||
|
||||
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
|
||||
(ins x86memop:$dst, KRC:$mask, srcRC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
|
||||
[]>, EVEX, EVEX_K;
|
||||
|
||||
}//mayStore = 1
|
||||
}
|
||||
defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
|
||||
defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
|
||||
defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
|
||||
defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
|
||||
i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
|
||||
defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
|
||||
defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
|
||||
defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
|
||||
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
|
||||
|
||||
def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
|
||||
def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
|
||||
def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
|
||||
def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
|
||||
def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
|
||||
multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
|
||||
X86VectorVTInfo DestInfo,
|
||||
PatFrag truncFrag, PatFrag mtruncFrag > {
|
||||
|
||||
def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
|
||||
(VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
|
||||
def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
|
||||
(VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
|
||||
def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
|
||||
(VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
|
||||
def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
|
||||
(VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
|
||||
def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
|
||||
addr:$dst, SrcInfo.RC:$src)>;
|
||||
|
||||
def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
|
||||
(SrcInfo.VT SrcInfo.RC:$src)),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
|
||||
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
|
||||
X86VectorVTInfo DestInfo, string sat > {
|
||||
|
||||
def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
|
||||
DestInfo.Suffix#"_mem_"#SrcInfo.Size)
|
||||
addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
|
||||
(COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
|
||||
(SrcInfo.VT SrcInfo.RC:$src))>;
|
||||
|
||||
def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
|
||||
DestInfo.Suffix#"_mem_"#SrcInfo.Size)
|
||||
addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
|
||||
(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
|
||||
(SrcInfo.VT SrcInfo.RC:$src))>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
|
||||
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
|
||||
X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
|
||||
X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
|
||||
Predicate prd = HasAVX512>{
|
||||
|
||||
let Predicates = [HasVLX, prd] in {
|
||||
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
|
||||
DestInfoZ128, x86memopZ128>,
|
||||
avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
|
||||
truncFrag, mtruncFrag>, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
|
||||
DestInfoZ256, x86memopZ256>,
|
||||
avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
|
||||
truncFrag, mtruncFrag>, EVEX_V256;
|
||||
}
|
||||
let Predicates = [prd] in
|
||||
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
|
||||
DestInfoZ, x86memopZ>,
|
||||
avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
|
||||
truncFrag, mtruncFrag>, EVEX_V512;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
|
||||
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
|
||||
X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
|
||||
X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
|
||||
|
||||
let Predicates = [HasVLX, prd] in {
|
||||
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
|
||||
DestInfoZ128, x86memopZ128>,
|
||||
avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
|
||||
sat>, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
|
||||
DestInfoZ256, x86memopZ256>,
|
||||
avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
|
||||
sat>, EVEX_V256;
|
||||
}
|
||||
let Predicates = [prd] in
|
||||
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
|
||||
DestInfoZ, x86memopZ>,
|
||||
avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
|
||||
sat>, EVEX_V512;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
|
||||
v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
|
||||
truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
|
||||
}
|
||||
multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
|
||||
v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
|
||||
sat>, EVEX_CD8<8, CD8VO>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
|
||||
v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
|
||||
truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
|
||||
}
|
||||
multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
|
||||
v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
|
||||
sat>, EVEX_CD8<16, CD8VQ>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
|
||||
v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
|
||||
truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
|
||||
}
|
||||
multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
|
||||
v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
|
||||
sat>, EVEX_CD8<32, CD8VH>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
|
||||
v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
|
||||
truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
|
||||
}
|
||||
multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
|
||||
v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
|
||||
sat>, EVEX_CD8<8, CD8VQ>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
|
||||
v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
|
||||
truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
|
||||
}
|
||||
multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
|
||||
v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
|
||||
sat>, EVEX_CD8<16, CD8VH>;
|
||||
}
|
||||
|
||||
multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
|
||||
v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
|
||||
truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
|
||||
}
|
||||
multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
|
||||
defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
|
||||
v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
|
||||
sat, HasBWI>, EVEX_CD8<16, CD8VH>;
|
||||
}
|
||||
|
||||
defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
|
||||
defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>;
|
||||
defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>;
|
||||
|
||||
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
|
||||
defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>;
|
||||
defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>;
|
||||
|
||||
defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
|
||||
defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>;
|
||||
defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>;
|
||||
|
||||
defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
|
||||
defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>;
|
||||
defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>;
|
||||
|
||||
defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
|
||||
defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>;
|
||||
defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>;
|
||||
|
||||
defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
|
||||
defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>;
|
||||
defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>;
|
||||
|
||||
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
|
||||
|
|
|
@ -114,19 +114,17 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
|
|||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisOpSmallerThanOp<1, 0>]>>;
|
||||
|
||||
def X86vtrunc : SDNode<"X86ISD::VTRUNC",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisOpSmallerThanOp<0, 1>]>>;
|
||||
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisOpSmallerThanOp<0, 1>]>;
|
||||
|
||||
def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
|
||||
def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
|
||||
def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
|
||||
|
||||
def X86trunc : SDNode<"X86ISD::TRUNC",
|
||||
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisOpSmallerThanOp<0, 1>]>>;
|
||||
|
||||
def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisVec<2>, SDTCisInt<2>,
|
||||
SDTCisOpSmallerThanOp<0, 2>]>>;
|
||||
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisFP<0>, SDTCisFP<1>,
|
||||
|
|
|
@ -21,10 +21,12 @@ enum IntrinsicType {
|
|||
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
||||
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
|
||||
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
||||
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
|
||||
VPERM_3OP_MASKZ,
|
||||
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
EXPAND_FROM_MEM, BLEND
|
||||
};
|
||||
|
||||
|
@ -138,6 +140,42 @@ static const IntrinsicData IntrinsicsWithChain[] = {
|
|||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
|
||||
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_512, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_128, TRUNCATE_TO_MEM_VI16,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_256, TRUNCATE_TO_MEM_VI16,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_512, TRUNCATE_TO_MEM_VI16,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_128, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_256, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_512, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_128, TRUNCATE_TO_MEM_VI32,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_256, TRUNCATE_TO_MEM_VI32,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_512, TRUNCATE_TO_MEM_VI32,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_128, TRUNCATE_TO_MEM_VI16,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_256, TRUNCATE_TO_MEM_VI16,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_512, TRUNCATE_TO_MEM_VI16,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_128, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_256, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
|
||||
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
|
||||
|
@ -813,6 +851,114 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNCUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PMULDQ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
|
||||
|
|
|
@ -1,24 +1,7 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
|
||||
|
||||
|
||||
; KNL-LABEL: trunc_16x32_to_16x8
|
||||
; KNL: vpmovdb
|
||||
; KNL: ret
|
||||
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_8x64_to_8x16
|
||||
; KNL: vpmovqw
|
||||
; KNL: ret
|
||||
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
;SKX-LABEL: zext_8x8mem_to_8x16:
|
||||
;SKX-LABEL: zext_8x8mem_to_8x16:
|
||||
;SKX: ## BB#0:
|
||||
;SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
|
||||
|
@ -895,13 +878,6 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
|
|||
ret <8 x i32> %y
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_v16i32_to_v16i16
|
||||
; KNL: vpmovdw
|
||||
; KNL: ret
|
||||
define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
|
||||
%1 = trunc <16 x i32> %x to <16 x i16>
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_i32_to_i1
|
||||
; KNL: movw $-4, %ax
|
|
@ -3119,6 +3119,396 @@ define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16
|
|||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
|
||||
; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqb %zmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
|
||||
; CHECK: vpmovqb %zmm0, (%rdi)
|
||||
; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
|
||||
; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
|
||||
; CHECK: vpmovsqb %zmm0, (%rdi)
|
||||
; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
|
||||
; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
|
||||
; CHECK: vpmovusqb %zmm0, (%rdi)
|
||||
; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
|
||||
; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqw %zmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
|
||||
; CHECK: vpmovqw %zmm0, (%rdi)
|
||||
; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
|
||||
; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
|
||||
; CHECK: vpmovsqw %zmm0, (%rdi)
|
||||
; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
|
||||
; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
|
||||
; CHECK: vpmovusqw %zmm0, (%rdi)
|
||||
; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
|
||||
; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqd %zmm0, %ymm0
|
||||
%res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i32> %res0, %res1
|
||||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
|
||||
; CHECK: vpmovqd %zmm0, (%rdi)
|
||||
; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
|
||||
; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
|
||||
%res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i32> %res0, %res1
|
||||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
|
||||
; CHECK: vpmovsqd %zmm0, (%rdi)
|
||||
; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
|
||||
; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
|
||||
%res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
|
||||
%res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i32> %res0, %res1
|
||||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
|
||||
; CHECK: vpmovusqd %zmm0, (%rdi)
|
||||
; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
|
||||
; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovdb %zmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
|
||||
; CHECK: vpmovdb %zmm0, (%rdi)
|
||||
; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
|
||||
; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
|
||||
; CHECK: vpmovsdb %zmm0, (%rdi)
|
||||
; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
|
||||
; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
|
||||
; CHECK: vpmovusdb %zmm0, (%rdi)
|
||||
; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
|
||||
; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
|
||||
%res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i16> %res0, %res1
|
||||
%res4 = add <16 x i16> %res3, %res2
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
|
||||
; CHECK: vpmovdw %zmm0, (%rdi)
|
||||
; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
|
||||
; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
|
||||
%res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i16> %res0, %res1
|
||||
%res4 = add <16 x i16> %res3, %res2
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
|
||||
; CHECK: vpmovsdw %zmm0, (%rdi)
|
||||
; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
|
||||
; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
|
||||
%res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i16> %res0, %res1
|
||||
%res4 = add <16 x i16> %res3, %res2
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
|
||||
; CHECK: vpmovusdw %zmm0, (%rdi)
|
||||
; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
|
||||
|
|
|
@ -0,0 +1,364 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
; KNL-LABEL: trunc_16x32_to_16x8
|
||||
; KNL: vpmovdb
|
||||
; KNL: ret
|
||||
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_8x64_to_8x16
|
||||
; KNL: vpmovqw
|
||||
; KNL: ret
|
||||
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_v16i32_to_v16i16
|
||||
; KNL: vpmovdw
|
||||
; KNL: ret
|
||||
define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
|
||||
%1 = trunc <16 x i32> %x to <16 x i16>
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qb_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i8>
|
||||
ret <8 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qb_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i8>
|
||||
store <8 x i8> %x, <8 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qb_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i64> %i to <4 x i8>
|
||||
ret <4 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qb_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqb %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i64> %i to <4 x i8>
|
||||
store <4 x i8> %x, <4 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qb_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i8>
|
||||
ret <2 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qb_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqb %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i8>
|
||||
store <2 x i8> %x, <2 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qw_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qw_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
store <8 x i16> %x, <8 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qw_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i64> %i to <4 x i16>
|
||||
ret <4 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qw_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i64> %i to <4 x i16>
|
||||
store <4 x i16> %x, <4 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qw_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i16>
|
||||
ret <2 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qw_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i16>
|
||||
store <2 x i16> %x, <2 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qd_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i32>
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qd_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i32>
|
||||
store <8 x i32> %x, <8 x i32>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qd_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i64> %i to <4 x i32>
|
||||
ret <4 x i32> %x
|
||||
}
|
||||
|
||||
define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qd_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i64> %i to <4 x i32>
|
||||
store <4 x i32> %x, <4 x i32>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qd_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i32>
|
||||
ret <2 x i32> %x
|
||||
}
|
||||
|
||||
define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qd_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i32>
|
||||
store <2 x i32> %x, <2 x i32>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_db_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_db_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
store <16 x i8> %x, <16 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_db_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i32> %i to <8 x i8>
|
||||
ret <8 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_db_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i32> %i to <8 x i8>
|
||||
store <8 x i8> %x, <8 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_db_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i32> %i to <4 x i8>
|
||||
ret <4 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_db_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i32> %i to <4 x i8>
|
||||
store <4 x i8> %x, <4 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_dw_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i16>
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_dw_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i16>
|
||||
store <16 x i16> %x, <16 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_dw_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i32> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_dw_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i32> %i to <8 x i16>
|
||||
store <8 x i16> %x, <8 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i16> @trunc_dw_128(<4 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_dw_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i32> %i to <4 x i16>
|
||||
ret <4 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_dw_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i32> %i to <4 x i16>
|
||||
store <4 x i16> %x, <4 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
|
||||
; SKX-LABEL: trunc_wb_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <32 x i16> %i to <32 x i8>
|
||||
ret <32 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_wb_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <32 x i16> %i to <32 x i8>
|
||||
store <32 x i8> %x, <32 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
|
||||
; SKX-LABEL: trunc_wb_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <16 x i16> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_wb_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <16 x i16> %i to <16 x i8>
|
||||
store <16 x i8> %x, <16 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
|
||||
; SKX-LABEL: trunc_wb_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i16> %i to <8 x i8>
|
||||
ret <8 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_wb_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <8 x i16> %i to <8 x i8>
|
||||
store <8 x i8> %x, <8 x i8>* %res
|
||||
ret void
|
||||
}
|
|
@ -1008,6 +1008,84 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i
|
|||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
|
||||
; CHECK: vpmovwb %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovwb %zmm0, %ymm0
|
||||
%res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
|
||||
%res3 = add <32 x i8> %res0, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
|
||||
; CHECK: vpmovwb %zmm0, (%rdi)
|
||||
; CHECK: vpmovwb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
|
||||
; CHECK: vpmovswb %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovswb %zmm0, %ymm0
|
||||
%res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
|
||||
%res3 = add <32 x i8> %res0, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
|
||||
; CHECK: vpmovswb %zmm0, (%rdi)
|
||||
; CHECK: vpmovswb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
|
||||
; CHECK: vpmovuswb %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovuswb %zmm0, %ymm0
|
||||
%res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
|
||||
%res3 = add <32 x i8> %res0, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
|
||||
; CHECK: vpmovuswb %zmm0, (%rdi)
|
||||
; CHECK: vpmovuswb %zmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
|
|
|
@ -3876,6 +3876,162 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i
|
|||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
|
||||
; CHECK: vpmovwb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
|
||||
; CHECK: vpmovwb %xmm0, (%rdi)
|
||||
; CHECK: vpmovwb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
|
||||
; CHECK: vpmovswb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovswb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
|
||||
; CHECK: vpmovswb %xmm0, (%rdi)
|
||||
; CHECK: vpmovswb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
|
||||
; CHECK: vpmovuswb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovuswb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
|
||||
; CHECK: vpmovuswb %xmm0, (%rdi)
|
||||
; CHECK: vpmovuswb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
|
||||
; CHECK: vpmovwb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovwb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
|
||||
; CHECK: vpmovwb %ymm0, (%rdi)
|
||||
; CHECK: vpmovwb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
|
||||
; CHECK: vpmovswb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovswb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
|
||||
; CHECK: vpmovswb %ymm0, (%rdi)
|
||||
; CHECK: vpmovswb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
|
||||
; CHECK: vpmovuswb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovuswb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
|
||||
; CHECK: vpmovuswb %ymm0, (%rdi)
|
||||
; CHECK: vpmovuswb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
|
||||
|
|
|
@ -3005,6 +3005,786 @@ define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x
|
|||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
|
||||
; CHECK: vpmovqb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
|
||||
; CHECK: vpmovqb %xmm0, (%rdi)
|
||||
; CHECK: vpmovqb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
|
||||
; CHECK: vpmovsqb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
|
||||
; CHECK: vpmovsqb %xmm0, (%rdi)
|
||||
; CHECK: vpmovsqb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
|
||||
; CHECK: vpmovusqb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
|
||||
; CHECK: vpmovusqb %xmm0, (%rdi)
|
||||
; CHECK: vpmovusqb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
|
||||
; CHECK: vpmovqb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
|
||||
; CHECK: vpmovqb %ymm0, (%rdi)
|
||||
; CHECK: vpmovqb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
|
||||
; CHECK: vpmovsqb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
|
||||
; CHECK: vpmovsqb %ymm0, (%rdi)
|
||||
; CHECK: vpmovsqb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
|
||||
; CHECK: vpmovusqb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
|
||||
; CHECK: vpmovusqb %ymm0, (%rdi)
|
||||
; CHECK: vpmovusqb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
|
||||
; CHECK: vpmovqw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqw %xmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
|
||||
; CHECK: vpmovqw %xmm0, (%rdi)
|
||||
; CHECK: vpmovqw %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
|
||||
; CHECK: vpmovsqw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqw %xmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
|
||||
; CHECK: vpmovsqw %xmm0, (%rdi)
|
||||
; CHECK: vpmovsqw %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
|
||||
; CHECK: vpmovusqw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqw %xmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
|
||||
; CHECK: vpmovusqw %xmm0, (%rdi)
|
||||
; CHECK: vpmovusqw %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
|
||||
; CHECK: vpmovqw %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqw %ymm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
|
||||
; CHECK: vpmovqw %ymm0, (%rdi)
|
||||
; CHECK: vpmovqw %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
|
||||
; CHECK: vpmovsqw %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqw %ymm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
|
||||
; CHECK: vpmovsqw %ymm0, (%rdi)
|
||||
; CHECK: vpmovsqw %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
|
||||
; CHECK: vpmovusqw %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqw %ymm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
|
||||
; CHECK: vpmovusqw %ymm0, (%rdi)
|
||||
; CHECK: vpmovusqw %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
|
||||
; CHECK: vpmovqd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqd %xmm0, %xmm0
|
||||
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <4 x i32> %res0, %res1
|
||||
%res4 = add <4 x i32> %res3, %res2
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
|
||||
; CHECK: vpmovqd %xmm0, (%rdi)
|
||||
; CHECK: vpmovqd %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
|
||||
; CHECK: vpmovsqd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqd %xmm0, %xmm0
|
||||
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <4 x i32> %res0, %res1
|
||||
%res4 = add <4 x i32> %res3, %res2
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
|
||||
; CHECK: vpmovsqd %xmm0, (%rdi)
|
||||
; CHECK: vpmovsqd %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
|
||||
; CHECK: vpmovusqd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqd %xmm0, %xmm0
|
||||
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <4 x i32> %res0, %res1
|
||||
%res4 = add <4 x i32> %res3, %res2
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
|
||||
; CHECK: vpmovusqd %xmm0, (%rdi)
|
||||
; CHECK: vpmovusqd %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
|
||||
; CHECK: vpmovqd %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovqd %ymm0, %xmm0
|
||||
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <4 x i32> %res0, %res1
|
||||
%res4 = add <4 x i32> %res3, %res2
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
|
||||
; CHECK: vpmovqd %ymm0, (%rdi)
|
||||
; CHECK: vpmovqd %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
|
||||
; CHECK: vpmovsqd %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsqd %ymm0, %xmm0
|
||||
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <4 x i32> %res0, %res1
|
||||
%res4 = add <4 x i32> %res3, %res2
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
|
||||
; CHECK: vpmovsqd %ymm0, (%rdi)
|
||||
; CHECK: vpmovsqd %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
|
||||
; CHECK: vpmovusqd %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusqd %ymm0, %xmm0
|
||||
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
|
||||
%res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
%res3 = add <4 x i32> %res0, %res1
|
||||
%res4 = add <4 x i32> %res3, %res2
|
||||
ret <4 x i32> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
|
||||
; CHECK: vpmovusqd %ymm0, (%rdi)
|
||||
; CHECK: vpmovusqd %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
|
||||
; CHECK: vpmovdb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovdb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
|
||||
; CHECK: vpmovdb %xmm0, (%rdi)
|
||||
; CHECK: vpmovdb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
|
||||
; CHECK: vpmovsdb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsdb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
|
||||
; CHECK: vpmovsdb %xmm0, (%rdi)
|
||||
; CHECK: vpmovsdb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
|
||||
; CHECK: vpmovusdb %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusdb %xmm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
|
||||
; CHECK: vpmovusdb %xmm0, (%rdi)
|
||||
; CHECK: vpmovusdb %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
|
||||
; CHECK: vpmovdb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovdb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
|
||||
; CHECK: vpmovdb %ymm0, (%rdi)
|
||||
; CHECK: vpmovdb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
|
||||
; CHECK: vpmovsdb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsdb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
|
||||
; CHECK: vpmovsdb %ymm0, (%rdi)
|
||||
; CHECK: vpmovsdb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
|
||||
; CHECK: vpmovusdb %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusdb %ymm0, %xmm0
|
||||
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
|
||||
%res3 = add <16 x i8> %res0, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
|
||||
; CHECK: vpmovusdb %ymm0, (%rdi)
|
||||
; CHECK: vpmovusdb %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
|
||||
; CHECK: vpmovdw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovdw %xmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
|
||||
; CHECK: vpmovdw %xmm0, (%rdi)
|
||||
; CHECK: vpmovdw %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
|
||||
; CHECK: vpmovsdw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsdw %xmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
|
||||
; CHECK: vpmovsdw %xmm0, (%rdi)
|
||||
; CHECK: vpmovsdw %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
|
||||
; CHECK: vpmovusdw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusdw %xmm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
|
||||
; CHECK: vpmovusdw %xmm0, (%rdi)
|
||||
; CHECK: vpmovusdw %xmm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
|
||||
; CHECK: vpmovdw %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
|
||||
; CHECK: vpmovdw %ymm0, (%rdi)
|
||||
; CHECK: vpmovdw %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
|
||||
; CHECK: vpmovsdw %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovsdw %ymm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
|
||||
; CHECK: vpmovsdw %ymm0, (%rdi)
|
||||
; CHECK: vpmovsdw %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
|
||||
; CHECK: vpmovusdw %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovusdw %ymm0, %xmm0
|
||||
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res3 = add <8 x i16> %res0, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
|
||||
|
||||
define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
|
||||
; CHECK: vpmovusdw %ymm0, (%rdi)
|
||||
; CHECK: vpmovusdw %ymm0, (%rdi) {%k1}
|
||||
call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
|
||||
call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
|
||||
|
|
|
@ -190,10 +190,13 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
|||
; AVX2-LABEL: test15
|
||||
; AVX2: vpmaskmovd
|
||||
|
||||
; SKX-LABEL: test15
|
||||
; SKX: kshiftl
|
||||
; SKX: kshiftr
|
||||
; SKX: vmovdqu32 {{.*}}{%k1}
|
||||
; SKX-LABEL: test15:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
|
||||
; SKX-NEXT: retq
|
||||
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
|
|
|
@ -3668,6 +3668,126 @@
|
|||
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff]
|
||||
vpabsw -8256(%rdx), %zmm30
|
||||
|
||||
// CHECK: vpmovwb %zmm27, %ymm22
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x30,0xde]
|
||||
vpmovwb %zmm27, %ymm22
|
||||
|
||||
// CHECK: vpmovwb %zmm27, %ymm22 {%k1}
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x49,0x30,0xde]
|
||||
vpmovwb %zmm27, %ymm22 {%k1}
|
||||
|
||||
// CHECK: vpmovwb %zmm27, %ymm22 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0xc9,0x30,0xde]
|
||||
vpmovwb %zmm27, %ymm22 {%k1} {z}
|
||||
|
||||
// CHECK: vpmovwb %zmm22, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x31]
|
||||
vpmovwb %zmm22, (%rcx)
|
||||
|
||||
// CHECK: vpmovwb %zmm22, (%rcx) {%k4}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x4c,0x30,0x31]
|
||||
vpmovwb %zmm22, (%rcx) {%k4}
|
||||
|
||||
// CHECK: vpmovwb %zmm22, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovwb %zmm22, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovwb %zmm22, 4064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x7f]
|
||||
vpmovwb %zmm22, 4064(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %zmm22, 4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0x00,0x10,0x00,0x00]
|
||||
vpmovwb %zmm22, 4096(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %zmm22, -4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x80]
|
||||
vpmovwb %zmm22, -4096(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %zmm22, -4128(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0xe0,0xef,0xff,0xff]
|
||||
vpmovwb %zmm22, -4128(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %zmm18, %ymm23
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x20,0xd7]
|
||||
vpmovswb %zmm18, %ymm23
|
||||
|
||||
// CHECK: vpmovswb %zmm18, %ymm23 {%k2}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x4a,0x20,0xd7]
|
||||
vpmovswb %zmm18, %ymm23 {%k2}
|
||||
|
||||
// CHECK: vpmovswb %zmm18, %ymm23 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0xca,0x20,0xd7]
|
||||
vpmovswb %zmm18, %ymm23 {%k2} {z}
|
||||
|
||||
// CHECK: vpmovswb %zmm24, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x01]
|
||||
vpmovswb %zmm24, (%rcx)
|
||||
|
||||
// CHECK: vpmovswb %zmm24, (%rcx) {%k7}
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x4f,0x20,0x01]
|
||||
vpmovswb %zmm24, (%rcx) {%k7}
|
||||
|
||||
// CHECK: vpmovswb %zmm24, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovswb %zmm24, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovswb %zmm24, 4064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x7f]
|
||||
vpmovswb %zmm24, 4064(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %zmm24, 4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0x00,0x10,0x00,0x00]
|
||||
vpmovswb %zmm24, 4096(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %zmm24, -4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x80]
|
||||
vpmovswb %zmm24, -4096(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %zmm24, -4128(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0xe0,0xef,0xff,0xff]
|
||||
vpmovswb %zmm24, -4128(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %zmm22, %ymm28
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x48,0x10,0xf4]
|
||||
vpmovuswb %zmm22, %ymm28
|
||||
|
||||
// CHECK: vpmovuswb %zmm22, %ymm28 {%k3}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x4b,0x10,0xf4]
|
||||
vpmovuswb %zmm22, %ymm28 {%k3}
|
||||
|
||||
// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0xcb,0x10,0xf4]
|
||||
vpmovuswb %zmm22, %ymm28 {%k3} {z}
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x19]
|
||||
vpmovuswb %zmm27, (%rcx)
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, (%rcx) {%k2}
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x4a,0x10,0x19]
|
||||
vpmovuswb %zmm27, (%rcx) {%k2}
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovuswb %zmm27, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, 4064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x7f]
|
||||
vpmovuswb %zmm27, 4064(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, 4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0x00,0x10,0x00,0x00]
|
||||
vpmovuswb %zmm27, 4096(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, -4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x80]
|
||||
vpmovuswb %zmm27, -4096(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %zmm27, -4128(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0xe0,0xef,0xff,0xff]
|
||||
vpmovuswb %zmm27, -4128(%rdx)
|
||||
|
||||
// CHECK: vpmulhuw %zmm21, %zmm24, %zmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xe4,0xed]
|
||||
vpmulhuw %zmm21, %zmm24, %zmm21
|
||||
|
|
|
@ -6583,6 +6583,486 @@
|
|||
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
|
||||
vpshufb -4128(%rdx), %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpmovwb %xmm28, %xmm27
|
||||
// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x30,0xe3]
|
||||
vpmovwb %xmm28, %xmm27
|
||||
|
||||
// CHECK: vpmovwb %xmm28, %xmm27 {%k2}
|
||||
// CHECK: encoding: [0x62,0x02,0x7e,0x0a,0x30,0xe3]
|
||||
vpmovwb %xmm28, %xmm27 {%k2}
|
||||
|
||||
// CHECK: vpmovwb %xmm28, %xmm27 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x02,0x7e,0x8a,0x30,0xe3]
|
||||
vpmovwb %xmm28, %xmm27 {%k2} {z}
|
||||
|
||||
// CHECK: vpmovwb %ymm26, %xmm26
|
||||
// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x30,0xd2]
|
||||
vpmovwb %ymm26, %xmm26
|
||||
|
||||
// CHECK: vpmovwb %ymm26, %xmm26 {%k4}
|
||||
// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x30,0xd2]
|
||||
vpmovwb %ymm26, %xmm26 {%k4}
|
||||
|
||||
// CHECK: vpmovwb %ymm26, %xmm26 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x30,0xd2]
|
||||
vpmovwb %ymm26, %xmm26 {%k4} {z}
|
||||
|
||||
// CHECK: vpmovwb %xmm23, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x39]
|
||||
vpmovwb %xmm23,(%rcx)
|
||||
|
||||
// CHECK: vpmovwb %xmm23, (%rcx) {%k6}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x30,0x39]
|
||||
vpmovwb %xmm23,(%rcx) {%k6}
|
||||
|
||||
// CHECK: vpmovwb %xmm23, 4660(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xbc,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpmovwb %xmm23,4660(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovwb %xmm23, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x7f]
|
||||
vpmovwb %xmm23, 1016(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm23, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0x00,0x04,0x00,0x00]
|
||||
vpmovwb %xmm23, 1024(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm23, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x80]
|
||||
vpmovwb %xmm23,-1024(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm23, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0xf8,0xfb,0xff,0xff]
|
||||
vpmovwb %xmm23,-1032(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm21, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x29]
|
||||
vpmovwb %ymm21,(%rcx)
|
||||
|
||||
// CHECK: vpmovwb %ymm21, (%rcx) {%k5}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x30,0x29]
|
||||
vpmovwb %ymm21,(%rcx) {%k5}
|
||||
|
||||
// CHECK: vpmovwb %ymm21, 4660(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xac,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpmovwb %ymm21, 4660(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovwb %ymm21, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x7f]
|
||||
vpmovwb %ymm21, 2032(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm21, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0x00,0x08,0x00,0x00]
|
||||
vpmovwb %ymm21, 2048(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm21, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x80]
|
||||
vpmovwb %ymm21,-2048(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm21, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0xf0,0xf7,0xff,0xff]
|
||||
vpmovwb %ymm21, -2064(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm19, %xmm17
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0xd9]
|
||||
vpmovswb %xmm19, %xmm17
|
||||
|
||||
// CHECK: vpmovswb %xmm19, %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x20,0xd9]
|
||||
vpmovswb %xmm19, %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpmovswb %xmm19, %xmm17 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x20,0xd9]
|
||||
vpmovswb %xmm19, %xmm17 {%k1} {z}
|
||||
|
||||
// CHECK: vpmovswb %ymm19, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xdd]
|
||||
vpmovswb %ymm19, %xmm21
|
||||
|
||||
// CHECK: vpmovswb %ymm19, %xmm21 {%k4}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x20,0xdd]
|
||||
vpmovswb %ymm19, %xmm21 {%k4}
|
||||
|
||||
// CHECK: vpmovswb %ymm19, %xmm21 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x20,0xdd]
|
||||
vpmovswb %ymm19, %xmm21 {%k4} {z}
|
||||
|
||||
// CHECK: vpmovswb %xmm18, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x11]
|
||||
vpmovswb %xmm18,(%rcx)
|
||||
|
||||
// CHECK: vpmovswb %xmm18, (%rcx) {%k2}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x20,0x11]
|
||||
vpmovswb %xmm18,(%rcx) {%k2}
|
||||
|
||||
// CHECK: vpmovswb %xmm18, 4660(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0x94,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpmovswb %xmm18, 4660(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovswb %xmm18, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x7f]
|
||||
vpmovswb %xmm18, 1016(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm18, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0x00,0x04,0x00,0x00]
|
||||
vpmovswb %xmm18, 1024(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm18, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x80]
|
||||
vpmovswb %xmm18, -1024(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm18, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0xf8,0xfb,0xff,0xff]
|
||||
vpmovswb %xmm18, -1032(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm23, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x39]
|
||||
vpmovswb %ymm23,(%rcx)
|
||||
|
||||
// CHECK: vpmovswb %ymm23, (%rcx) {%k2}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x20,0x39]
|
||||
vpmovswb %ymm23,(%rcx) {%k2}
|
||||
|
||||
// CHECK: vpmovswb %ymm23, 4660(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xbc,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpmovswb %ymm23, 4660(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovswb %ymm23, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x7f]
|
||||
vpmovswb %ymm23, 2032(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm23, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0x00,0x08,0x00,0x00]
|
||||
vpmovswb %ymm23, 2048(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm23, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x80]
|
||||
vpmovswb %ymm23, -2048(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm23, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0xf0,0xf7,0xff,0xff]
|
||||
vpmovswb %ymm23, -2064(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm17, %xmm26
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x10,0xca]
|
||||
vpmovuswb %xmm17, %xmm26
|
||||
|
||||
// CHECK: vpmovuswb %xmm17, %xmm26 {%k6}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x0e,0x10,0xca]
|
||||
vpmovuswb %xmm17, %xmm26 {%k6}
|
||||
|
||||
// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x8e,0x10,0xca]
|
||||
vpmovuswb %xmm17, %xmm26 {%k6} {z}
|
||||
|
||||
// CHECK: vpmovuswb %ymm26, %xmm17
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xd1]
|
||||
vpmovuswb %ymm26, %xmm17
|
||||
|
||||
// CHECK: vpmovuswb %ymm26, %xmm17 {%k2}
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x2a,0x10,0xd1]
|
||||
vpmovuswb %ymm26, %xmm17 {%k2}
|
||||
|
||||
// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0xaa,0x10,0xd1]
|
||||
vpmovuswb %ymm26, %xmm17 {%k2} {z}
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x19]
|
||||
vpmovuswb %xmm19,(%rcx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, (%rcx) {%k1}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x10,0x19]
|
||||
vpmovuswb %xmm19,(%rcx) {%k1}
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, 4660(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0x9c,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpmovuswb %xmm19, 4660(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x7f]
|
||||
vpmovuswb %xmm19, 1016(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0x00,0x04,0x00,0x00]
|
||||
vpmovuswb %xmm19, 1024(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x80]
|
||||
vpmovuswb %xmm19, -1024(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0xf8,0xfb,0xff,0xff]
|
||||
vpmovuswb %xmm19, -1032(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x39]
|
||||
vpmovuswb %ymm23,(%rcx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, (%rcx) {%k6}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x10,0x39]
|
||||
vpmovuswb %ymm23,(%rcx) {%k6}
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, 4660(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x10,0xbc,0xf0,0x34,0x12,0x00,0x00]
|
||||
vpmovuswb %ymm23, 4660(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x7f]
|
||||
vpmovuswb %ymm23, 2032(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0x00,0x08,0x00,0x00]
|
||||
vpmovuswb %ymm23, 2048(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x80]
|
||||
vpmovuswb %ymm23, -2048(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0xf0,0xf7,0xff,0xff]
|
||||
vpmovuswb %ymm23, -2064(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm17, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xcd]
|
||||
vpmovwb %xmm17, %xmm21
|
||||
|
||||
// CHECK: vpmovwb %xmm17, %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x30,0xcd]
|
||||
vpmovwb %xmm17, %xmm21 {%k1}
|
||||
|
||||
// CHECK: vpmovwb %xmm17, %xmm21 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x30,0xcd]
|
||||
vpmovwb %xmm17, %xmm21 {%k1} {z}
|
||||
|
||||
// CHECK: vpmovwb %ymm23, %xmm26
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x30,0xfa]
|
||||
vpmovwb %ymm23, %xmm26
|
||||
|
||||
// CHECK: vpmovwb %ymm23, %xmm26 {%k7}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x2f,0x30,0xfa]
|
||||
vpmovwb %ymm23, %xmm26 {%k7}
|
||||
|
||||
// CHECK: vpmovwb %ymm23, %xmm26 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0xaf,0x30,0xfa]
|
||||
vpmovwb %ymm23, %xmm26 {%k7} {z}
|
||||
|
||||
// CHECK: vpmovwb %xmm21, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x29]
|
||||
vpmovwb %xmm21, (%rcx)
|
||||
|
||||
// CHECK: vpmovwb %xmm21, (%rcx) {%k2}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x30,0x29]
|
||||
vpmovwb %xmm21, (%rcx) {%k2}
|
||||
|
||||
// CHECK: vpmovwb %xmm21, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovwb %xmm21, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovwb %xmm21, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x7f]
|
||||
vpmovwb %xmm21, 1016(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm21, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0x00,0x04,0x00,0x00]
|
||||
vpmovwb %xmm21, 1024(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm21, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x80]
|
||||
vpmovwb %xmm21, -1024(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %xmm21, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0xf8,0xfb,0xff,0xff]
|
||||
vpmovwb %xmm21, -1032(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm20, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x21]
|
||||
vpmovwb %ymm20, (%rcx)
|
||||
|
||||
// CHECK: vpmovwb %ymm20, (%rcx) {%k4}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x30,0x21]
|
||||
vpmovwb %ymm20, (%rcx) {%k4}
|
||||
|
||||
// CHECK: vpmovwb %ymm20, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovwb %ymm20, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovwb %ymm20, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x7f]
|
||||
vpmovwb %ymm20, 2032(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm20, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0x00,0x08,0x00,0x00]
|
||||
vpmovwb %ymm20, 2048(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm20, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x80]
|
||||
vpmovwb %ymm20, -2048(%rdx)
|
||||
|
||||
// CHECK: vpmovwb %ymm20, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0xf0,0xf7,0xff,0xff]
|
||||
vpmovwb %ymm20, -2064(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm20, %xmm24
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x20,0xe0]
|
||||
vpmovswb %xmm20, %xmm24
|
||||
|
||||
// CHECK: vpmovswb %xmm20, %xmm24 {%k4}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x0c,0x20,0xe0]
|
||||
vpmovswb %xmm20, %xmm24 {%k4}
|
||||
|
||||
// CHECK: vpmovswb %xmm20, %xmm24 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x8c,0x20,0xe0]
|
||||
vpmovswb %xmm20, %xmm24 {%k4} {z}
|
||||
|
||||
// CHECK: vpmovswb %ymm18, %xmm27
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x20,0xd3]
|
||||
vpmovswb %ymm18, %xmm27
|
||||
|
||||
// CHECK: vpmovswb %ymm18, %xmm27 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x29,0x20,0xd3]
|
||||
vpmovswb %ymm18, %xmm27 {%k1}
|
||||
|
||||
// CHECK: vpmovswb %ymm18, %xmm27 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0xa9,0x20,0xd3]
|
||||
vpmovswb %ymm18, %xmm27 {%k1} {z}
|
||||
|
||||
// CHECK: vpmovswb %xmm24, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x01]
|
||||
vpmovswb %xmm24, (%rcx)
|
||||
|
||||
// CHECK: vpmovswb %xmm24, (%rcx) {%k3}
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x20,0x01]
|
||||
vpmovswb %xmm24, (%rcx) {%k3}
|
||||
|
||||
// CHECK: vpmovswb %xmm24, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovswb %xmm24, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovswb %xmm24, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x7f]
|
||||
vpmovswb %xmm24, 1016(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm24, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0x00,0x04,0x00,0x00]
|
||||
vpmovswb %xmm24, 1024(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm24, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x80]
|
||||
vpmovswb %xmm24, -1024(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %xmm24, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0xf8,0xfb,0xff,0xff]
|
||||
vpmovswb %xmm24, -1032(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm27, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x19]
|
||||
vpmovswb %ymm27, (%rcx)
|
||||
|
||||
// CHECK: vpmovswb %ymm27, (%rcx) {%k7}
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x20,0x19]
|
||||
vpmovswb %ymm27, (%rcx) {%k7}
|
||||
|
||||
// CHECK: vpmovswb %ymm27, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovswb %ymm27, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovswb %ymm27, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x7f]
|
||||
vpmovswb %ymm27, 2032(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm27, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0x00,0x08,0x00,0x00]
|
||||
vpmovswb %ymm27, 2048(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm27, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x80]
|
||||
vpmovswb %ymm27, -2048(%rdx)
|
||||
|
||||
// CHECK: vpmovswb %ymm27, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0xf0,0xf7,0xff,0xff]
|
||||
vpmovswb %ymm27, -2064(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, %xmm23
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0xdf]
|
||||
vpmovuswb %xmm19, %xmm23
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, %xmm23 {%k4}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x10,0xdf]
|
||||
vpmovuswb %xmm19, %xmm23 {%k4}
|
||||
|
||||
// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x10,0xdf]
|
||||
vpmovuswb %xmm19, %xmm23 {%k4} {z}
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, %xmm28
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x10,0xfc]
|
||||
vpmovuswb %ymm23, %xmm28
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, %xmm28 {%k6}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0x2e,0x10,0xfc]
|
||||
vpmovuswb %ymm23, %xmm28 {%k6}
|
||||
|
||||
// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x7e,0xae,0x10,0xfc]
|
||||
vpmovuswb %ymm23, %xmm28 {%k6} {z}
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x09]
|
||||
vpmovuswb %xmm25, (%rcx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, (%rcx) {%k3}
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x10,0x09]
|
||||
vpmovuswb %xmm25, (%rcx) {%k3}
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovuswb %xmm25, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, 1016(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x7f]
|
||||
vpmovuswb %xmm25, 1016(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, 1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
|
||||
vpmovuswb %xmm25, 1024(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, -1024(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x80]
|
||||
vpmovuswb %xmm25, -1024(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %xmm25, -1032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
|
||||
vpmovuswb %xmm25, -1032(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x21]
|
||||
vpmovuswb %ymm28, (%rcx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, (%rcx) {%k2}
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x10,0x21]
|
||||
vpmovuswb %ymm28, (%rcx) {%k2}
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpmovuswb %ymm28, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x7f]
|
||||
vpmovuswb %ymm28, 2032(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0x00,0x08,0x00,0x00]
|
||||
vpmovuswb %ymm28, 2048(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x80]
|
||||
vpmovuswb %ymm28, -2048(%rdx)
|
||||
|
||||
// CHECK: vpmovuswb %ymm28, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0xf0,0xf7,0xff,0xff]
|
||||
vpmovuswb %ymm28, -2064(%rdx)
|
||||
|
||||
// CHECK: vpmulhuw %xmm18, %xmm21, %xmm24
|
||||
// CHECK: encoding: [0x62,0x21,0x55,0x00,0xe4,0xc2]
|
||||
vpmulhuw %xmm18, %xmm21, %xmm24
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue