forked from OSchip/llvm-project
[X86] AVX512FP16 instructions enabling 3/6
Enable FP16 conversion instructions. Ref.: https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D105265
This commit is contained in:
parent
94d0914292
commit
2379949aad
|
@ -1876,6 +1876,84 @@ TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512f
|
|||
TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsi2sh, "V8xV8xiIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16")
|
||||
|
||||
// generic select intrinsics
|
||||
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
|
||||
|
|
|
@ -92,6 +92,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f")
|
|||
TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtusi642sh, "V8xV8xUOiIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtsi642sh, "V8xV8xOiIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
|
||||
|
||||
// UINTR
|
||||
|
|
|
@ -12723,10 +12723,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtqq2pd512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtw2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
|
||||
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
|
||||
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
|
||||
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
|
||||
|
||||
case X86::BI__builtin_ia32_vfmaddss3:
|
||||
|
|
|
@ -947,6 +947,996 @@ static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
|
|||
return __b[0];
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundpd_ph(A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
|
||||
(__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundpd_ph(U, A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
|
||||
(__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
|
||||
(__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
|
||||
(__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
|
||||
(__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_pd(A, R) \
|
||||
((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
|
||||
(__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_pd(W, U, A, R) \
|
||||
((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_pd(U, A, R) \
|
||||
((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
|
||||
(__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
|
||||
return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
|
||||
(__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
|
||||
(__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512d __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
|
||||
return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
|
||||
(__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundsh_ss(A, B, R) \
|
||||
((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
|
||||
(__v4sf)_mm_undefined_ps(), \
|
||||
(__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \
|
||||
((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \
|
||||
(__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \
|
||||
((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
|
||||
__m128h __B) {
|
||||
return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
|
||||
(__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
|
||||
__mmask8 __U,
|
||||
__m128 __A,
|
||||
__m128h __B) {
|
||||
return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
|
||||
(__v4sf)__W, (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U,
|
||||
__m128 __A,
|
||||
__m128h __B) {
|
||||
return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
|
||||
(__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundss_sh(A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
|
||||
(__v8hf)_mm_undefined_ph(), \
|
||||
(__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \
|
||||
(__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm_maskz_cvt_roundss_sh(U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
|
||||
(__v8hf)_mm_setzero_ph(), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
|
||||
__m128 __B) {
|
||||
return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
|
||||
(__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
|
||||
__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128 __B) {
|
||||
return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
|
||||
(__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128 __B) {
|
||||
return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
|
||||
(__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundsd_sh(A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
|
||||
(__v8hf)_mm_undefined_ph(), \
|
||||
(__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \
|
||||
(__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
|
||||
(__v8hf)_mm_setzero_ph(), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
|
||||
__m128d __B) {
|
||||
return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
|
||||
(__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
|
||||
__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128d __B) {
|
||||
return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
|
||||
(__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
|
||||
return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
|
||||
(__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundsh_sd(A, B, R) \
|
||||
((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
|
||||
(__v2df)_mm_undefined_pd(), \
|
||||
(__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \
|
||||
((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \
|
||||
(__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \
|
||||
((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
|
||||
__m128h __B) {
|
||||
return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
|
||||
(__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
|
||||
__mmask8 __U,
|
||||
__m128d __A,
|
||||
__m128h __B) {
|
||||
return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
|
||||
(__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
|
||||
return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
|
||||
(__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_epi16(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
|
||||
(__v32hi)_mm512_undefined_epi32(), \
|
||||
(__mmask32)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_epi16(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
|
||||
(__v32hi)_mm512_setzero_epi32(), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtph_epi16(__m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2w512_mask(
|
||||
(__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2w512_mask(
|
||||
(__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2w512_mask(
|
||||
(__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundph_epi16(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2w512_mask( \
|
||||
(__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
|
||||
(__v32hi)_mm512_setzero_epi32(), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttph_epi16(__m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2w512_mask(
|
||||
(__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2w512_mask(
|
||||
(__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2w512_mask(
|
||||
(__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundepi16_ph(A, R) \
|
||||
((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \
|
||||
(__v32hf)_mm512_undefined_ph(), \
|
||||
(__mmask32)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \
|
||||
((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \
|
||||
((__m512h)__builtin_ia32_vcvtw2ph512_mask( \
|
||||
(__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi16_ph(__m512i __A) {
|
||||
return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
|
||||
(__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
|
||||
return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
|
||||
(__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
|
||||
return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
|
||||
(__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_epu16(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2uw512_mask( \
|
||||
(__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_epu16(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
|
||||
(__v32hu)_mm512_setzero_epi32(), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtph_epu16(__m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
|
||||
(__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
|
||||
(__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
|
||||
(__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundph_epu16(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2uw512_mask( \
|
||||
(__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
|
||||
(__v32hu)_mm512_setzero_epi32(), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttph_epu16(__m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
|
||||
(__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
|
||||
(__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
|
||||
(__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundepu16_ph(A, R) \
|
||||
((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \
|
||||
(__v32hf)_mm512_undefined_ph(), \
|
||||
(__mmask32)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \
|
||||
((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \
|
||||
((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \
|
||||
(__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu16_ph(__m512i __A) {
|
||||
return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
|
||||
(__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
|
||||
return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
|
||||
(__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
|
||||
return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
|
||||
(__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_epi32(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2dq512_mask( \
|
||||
(__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_epi32(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \
|
||||
(__v16si)_mm512_setzero_epi32(), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtph_epi32(__m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
|
||||
(__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
|
||||
(__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
|
||||
(__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_epu32(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2udq512_mask( \
|
||||
(__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_epu32(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \
|
||||
(__v16su)_mm512_setzero_epi32(), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtph_epu32(__m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
|
||||
(__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
|
||||
(__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
|
||||
(__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundepi32_ph(A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \
|
||||
(__v16hf)_mm256_undefined_ph(), \
|
||||
(__mmask16)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \
|
||||
(__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi32_ph(__m512i __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
|
||||
(__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
|
||||
(__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
|
||||
(__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundepu32_ph(A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \
|
||||
(__v16hf)_mm256_undefined_ph(), \
|
||||
(__mmask16)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \
|
||||
(__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu32_ph(__m512i __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
|
||||
(__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
|
||||
(__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
|
||||
(__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundph_epi32(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2dq512_mask( \
|
||||
(__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \
|
||||
(__v16si)_mm512_setzero_epi32(), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttph_epi32(__m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
|
||||
(__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
|
||||
(__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
|
||||
(__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundph_epu32(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
|
||||
(__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
|
||||
(__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \
|
||||
(int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttph_epu32(__m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
|
||||
(__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
|
||||
(__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
|
||||
(__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundepi64_ph(A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
|
||||
(__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
|
||||
(__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepi64_ph(__m512i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
|
||||
(__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
|
||||
(__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
|
||||
(__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_epi64(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \
|
||||
(__v8di)_mm512_undefined_epi32(), \
|
||||
(__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_epi64(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2qq512_mask( \
|
||||
(__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtph_epi64(__m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
|
||||
(__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
|
||||
(__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
|
||||
(__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundepu64_ph(A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
|
||||
(__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \
|
||||
((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
|
||||
(__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtepu64_ph(__m512i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
|
||||
(__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
|
||||
(__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
|
||||
(__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvt_roundph_epu64(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
|
||||
(__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvt_roundph_epu64(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
|
||||
(__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvtph_epu64(__m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
|
||||
(__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
|
||||
(__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
|
||||
(__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundph_epi64(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
|
||||
(__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
|
||||
(__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttph_epi64(__m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
|
||||
(__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
|
||||
(__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
|
||||
(__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundph_epu64(A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
|
||||
(__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \
|
||||
((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
|
||||
(__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_cvttph_epu64(__m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
|
||||
(__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
|
||||
(__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
|
||||
return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
|
||||
(__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundsh_i32(A, R) \
|
||||
((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
|
||||
return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundsh_u32(A, R) \
|
||||
((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS128
|
||||
_mm_cvtsh_u32(__m128h __A) {
|
||||
return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_cvt_roundsh_i64(A, R) \
|
||||
((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
|
||||
return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_cvt_roundsh_u64(A, R) \
|
||||
((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
|
||||
_mm_cvtsh_u64(__m128h __A) {
|
||||
return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
|
||||
(__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
#endif // __x86_64__
|
||||
|
||||
#define _mm_cvt_roundu32_sh(A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_cvtu32_sh(__m128h __A, unsigned int __B) {
|
||||
__A[0] = __B;
|
||||
return __A;
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_cvt_roundu64_sh(A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \
|
||||
(int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
|
||||
__A[0] = __B;
|
||||
return __A;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define _mm_cvt_roundi32_sh(A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
|
||||
int __B) {
|
||||
__A[0] = __B;
|
||||
return __A;
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_cvt_roundi64_sh(A, B, R) \
|
||||
((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
|
||||
long long __B) {
|
||||
__A[0] = __B;
|
||||
return __A;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define _mm_cvtt_roundsh_i32(A, R) \
|
||||
((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
|
||||
return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_cvtt_roundsh_i64(A, R) \
|
||||
((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
|
||||
return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define _mm_cvtt_roundsh_u32(A, R) \
|
||||
((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS128
|
||||
_mm_cvttsh_u32(__m128h __A) {
|
||||
return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_cvtt_roundsh_u64(A, R) \
|
||||
((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
|
||||
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
|
||||
_mm_cvttsh_u64(__m128h __A) {
|
||||
return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
|
||||
(__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define _mm512_cvtx_roundph_ps(A, R) \
|
||||
((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \
|
||||
(__v16sf)_mm512_undefined_ps(), \
|
||||
(__mmask16)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \
|
||||
((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtx_roundph_ps(U, A, R) \
|
||||
((__m512)__builtin_ia32_vcvtph2psx512_mask( \
|
||||
(__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
|
||||
return (__m512)__builtin_ia32_vcvtph2psx512_mask(
|
||||
(__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m512)__builtin_ia32_vcvtph2psx512_mask(
|
||||
(__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
|
||||
return (__m512)__builtin_ia32_vcvtph2psx512_mask(
|
||||
(__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_cvtx_roundps_ph(A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \
|
||||
(__v16hf)_mm256_undefined_ph(), \
|
||||
(__mmask16)(-1), (int)(R)))
|
||||
|
||||
#define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \
|
||||
(__mmask16)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_cvtx_roundps_ph(U, A, R) \
|
||||
((__m256h)__builtin_ia32_vcvtps2phx512_mask( \
|
||||
(__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
|
||||
(__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
|
||||
(__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
|
||||
return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
|
||||
(__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ _Float16 __DEFAULT_FN_ATTRS512
|
||||
_mm512_reduce_add_ph(__m512h __W) {
|
||||
return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
|
||||
|
|
|
@ -327,6 +327,772 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
|
|||
((__mmask8)__builtin_ia32_cmpph128_mask( \
|
||||
(__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
|
||||
(__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
|
||||
__mmask8 __U,
|
||||
__m128d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
|
||||
(__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
|
||||
(__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
|
||||
(__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
|
||||
return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
|
||||
(__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
|
||||
__mmask8 __U,
|
||||
__m128h __A) {
|
||||
return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
|
||||
return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
|
||||
(__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
|
||||
return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
|
||||
(__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
|
||||
return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
|
||||
(__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2w128_mask(
|
||||
(__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2w128_mask(
|
||||
(__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_epi16(__m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2w256_mask(
|
||||
(__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2w256_mask(
|
||||
(__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2w128_mask(
|
||||
(__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2w128_mask(
|
||||
(__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvttph_epi16(__m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2w256_mask(
|
||||
(__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2w256_mask(
|
||||
(__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
|
||||
return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtepi16_ph(__m256i __A) {
|
||||
return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
|
||||
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
|
||||
(__v16hf)_mm256_cvtepi16_ph(__A),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
|
||||
(__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
|
||||
(__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_epu16(__m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
|
||||
(__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
|
||||
(__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
|
||||
(__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
|
||||
(__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvttph_epu16(__m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
|
||||
(__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
|
||||
(__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
|
||||
(__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
|
||||
return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtepu16_ph(__m256i __A) {
|
||||
return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
|
||||
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
|
||||
(__v16hf)_mm256_cvtepu16_ph(__A),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
|
||||
(__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
|
||||
(__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_epi32(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
|
||||
(__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
|
||||
(__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
|
||||
(__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
|
||||
(__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_epu32(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
|
||||
(__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
|
||||
(__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
|
||||
(__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
|
||||
(__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtepi32_ph(__m256i __A) {
|
||||
return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
|
||||
(__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
|
||||
(__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtepu32_ph(__m256i __A) {
|
||||
return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
|
||||
(__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
|
||||
(__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvttph_epi32(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
|
||||
(__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
|
||||
(__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
|
||||
(__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
|
||||
(__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvttph_epu32(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
|
||||
(__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
|
||||
(__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
|
||||
(__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
|
||||
(__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtepi64_ph(__m256i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
|
||||
(__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
|
||||
(__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
|
||||
(__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
|
||||
(__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_epi64(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
|
||||
(__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
|
||||
(__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
|
||||
(__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
|
||||
(__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtepu64_ph(__m256i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
|
||||
(__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
|
||||
(__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
|
||||
(__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
|
||||
(__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_epu64(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
|
||||
(__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
|
||||
(__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
|
||||
(__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
|
||||
(__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvttph_epi64(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
|
||||
(__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
|
||||
(__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
|
||||
(__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
|
||||
return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
|
||||
(__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvttph_epu64(__m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
|
||||
(__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
|
||||
return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
|
||||
(__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
|
||||
return (__m128)__builtin_ia32_vcvtph2psx128_mask(
|
||||
(__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
|
||||
__mmask8 __U,
|
||||
__m128h __A) {
|
||||
return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
|
||||
return (__m128)__builtin_ia32_vcvtph2psx128_mask(
|
||||
(__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
|
||||
return (__m256)__builtin_ia32_vcvtph2psx256_mask(
|
||||
(__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
|
||||
return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
|
||||
return (__m256)__builtin_ia32_vcvtph2psx256_mask(
|
||||
(__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
|
||||
(__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
|
||||
__mmask8 __U,
|
||||
__m128 __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
|
||||
(__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
|
||||
(__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
|
||||
(__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
|
||||
return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
|
||||
(__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128h __W) {
|
||||
|
|
|
@ -3878,6 +3878,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_vcvttss2si64:
|
||||
case X86::BI__builtin_ia32_vcvttss2usi32:
|
||||
case X86::BI__builtin_ia32_vcvttss2usi64:
|
||||
case X86::BI__builtin_ia32_vcvttsh2si32:
|
||||
case X86::BI__builtin_ia32_vcvttsh2si64:
|
||||
case X86::BI__builtin_ia32_vcvttsh2usi32:
|
||||
case X86::BI__builtin_ia32_vcvttsh2usi64:
|
||||
ArgNum = 1;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_maxpd512:
|
||||
|
@ -3888,6 +3892,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_minph512:
|
||||
ArgNum = 2;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_vcvtph2pd512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2psx512_mask:
|
||||
case X86::BI__builtin_ia32_cvtps2pd512_mask:
|
||||
case X86::BI__builtin_ia32_cvttpd2dq512_mask:
|
||||
case X86::BI__builtin_ia32_cvttpd2qq512_mask:
|
||||
|
@ -3897,6 +3903,12 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_cvttps2qq512_mask:
|
||||
case X86::BI__builtin_ia32_cvttps2udq512_mask:
|
||||
case X86::BI__builtin_ia32_cvttps2uqq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvttph2w512_mask:
|
||||
case X86::BI__builtin_ia32_vcvttph2uw512_mask:
|
||||
case X86::BI__builtin_ia32_vcvttph2dq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvttph2udq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvttph2qq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
|
||||
case X86::BI__builtin_ia32_exp2pd_mask:
|
||||
case X86::BI__builtin_ia32_exp2ps_mask:
|
||||
case X86::BI__builtin_ia32_getexppd512_mask:
|
||||
|
@ -3916,6 +3928,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_cmpsd_mask:
|
||||
case X86::BI__builtin_ia32_cmpss_mask:
|
||||
case X86::BI__builtin_ia32_cmpsh_mask:
|
||||
case X86::BI__builtin_ia32_vcvtsh2sd_round_mask:
|
||||
case X86::BI__builtin_ia32_vcvtsh2ss_round_mask:
|
||||
case X86::BI__builtin_ia32_cvtss2sd_round_mask:
|
||||
case X86::BI__builtin_ia32_getexpsd128_round_mask:
|
||||
case X86::BI__builtin_ia32_getexpss128_round_mask:
|
||||
|
@ -3965,6 +3979,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_vcvtss2si64:
|
||||
case X86::BI__builtin_ia32_vcvtss2usi32:
|
||||
case X86::BI__builtin_ia32_vcvtss2usi64:
|
||||
case X86::BI__builtin_ia32_vcvtsh2si32:
|
||||
case X86::BI__builtin_ia32_vcvtsh2si64:
|
||||
case X86::BI__builtin_ia32_vcvtsh2usi32:
|
||||
case X86::BI__builtin_ia32_vcvtsh2usi64:
|
||||
case X86::BI__builtin_ia32_sqrtpd512:
|
||||
case X86::BI__builtin_ia32_sqrtps512:
|
||||
ArgNum = 1;
|
||||
|
@ -3988,11 +4006,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_cvtusi2sd64:
|
||||
case X86::BI__builtin_ia32_cvtusi2ss32:
|
||||
case X86::BI__builtin_ia32_cvtusi2ss64:
|
||||
case X86::BI__builtin_ia32_vcvtusi2sh:
|
||||
case X86::BI__builtin_ia32_vcvtusi642sh:
|
||||
case X86::BI__builtin_ia32_vcvtsi2sh:
|
||||
case X86::BI__builtin_ia32_vcvtsi642sh:
|
||||
ArgNum = 2;
|
||||
HasRC = true;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtpd2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtps2phx512_mask:
|
||||
case X86::BI__builtin_ia32_cvtpd2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtpd2dq512_mask:
|
||||
case X86::BI__builtin_ia32_cvtpd2qq512_mask:
|
||||
|
@ -4006,6 +4030,18 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
|
||||
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtw2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2w512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2uw512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2dq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2udq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2qq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtph2uqq512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
|
||||
case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
|
||||
ArgNum = 3;
|
||||
HasRC = true;
|
||||
break;
|
||||
|
@ -4026,6 +4062,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_scalefsd_round_mask:
|
||||
case X86::BI__builtin_ia32_scalefss_round_mask:
|
||||
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
|
||||
case X86::BI__builtin_ia32_vcvtss2sh_round_mask:
|
||||
case X86::BI__builtin_ia32_vcvtsd2sh_round_mask:
|
||||
case X86::BI__builtin_ia32_sqrtsd_round_mask:
|
||||
case X86::BI__builtin_ia32_sqrtss_round_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsd3_mask:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1215,6 +1215,798 @@ __mmask8 test_mm_mask_cmp_ph_mask_true_us(__mmask8 m, __m128h a, __m128h b) {
|
|||
return _mm_mask_cmp_ph_mask(m, a, b, _CMP_TRUE_US);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtpd_ph(__m128d A) {
|
||||
// CHECK-LABEL: test_mm_cvtpd_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
|
||||
return _mm_cvtpd_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtpd_ph(__m128h A, __mmask8 B, __m128d C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtpd_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
|
||||
return _mm_mask_cvtpd_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtpd_ph(__mmask8 A, __m128d B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtpd_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
|
||||
return _mm_maskz_cvtpd_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm256_cvtpd_ph(__m256d A) {
|
||||
// CHECK-LABEL: test_mm256_cvtpd_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
|
||||
return _mm256_cvtpd_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm256_mask_cvtpd_ph(__m128h A, __mmask8 B, __m256d C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtpd_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
|
||||
return _mm256_mask_cvtpd_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm256_maskz_cvtpd_ph(__mmask8 A, __m256d B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtpd_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
|
||||
return _mm256_maskz_cvtpd_ph(A, B);
|
||||
}
|
||||
|
||||
__m128d test_mm_cvtph_pd(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_pd
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
|
||||
return _mm_cvtph_pd(A);
|
||||
}
|
||||
|
||||
__m128d test_mm_mask_cvtph_pd(__m128d A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_pd
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
|
||||
return _mm_mask_cvtph_pd(A, B, C);
|
||||
}
|
||||
|
||||
__m128d test_mm_maskz_cvtph_pd(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_pd
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
|
||||
return _mm_maskz_cvtph_pd(A, B);
|
||||
}
|
||||
|
||||
__m256d test_mm256_cvtph_pd(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_pd
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
|
||||
return _mm256_cvtph_pd(A);
|
||||
}
|
||||
|
||||
__m256d test_mm256_mask_cvtph_pd(__m256d A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_pd
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
|
||||
return _mm256_mask_cvtph_pd(A, B, C);
|
||||
}
|
||||
|
||||
__m256d test_mm256_maskz_cvtph_pd(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_pd
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
|
||||
return _mm256_maskz_cvtph_pd(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtph_epi16(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
|
||||
return _mm_cvtph_epi16(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvtph_epi16(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
|
||||
return _mm_mask_cvtph_epi16(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvtph_epi16(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
|
||||
return _mm_maskz_cvtph_epi16(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvtph_epi16(__m256h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
|
||||
return _mm256_cvtph_epi16(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvtph_epi16(__m256i A, __mmask16 B, __m256h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
|
||||
return _mm256_mask_cvtph_epi16(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvtph_epi16(__mmask16 A, __m256h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
|
||||
return _mm256_maskz_cvtph_epi16(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvttph_epi16(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvttph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
|
||||
return _mm_cvttph_epi16(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvttph_epi16(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvttph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
|
||||
return _mm_mask_cvttph_epi16(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvttph_epi16(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvttph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
|
||||
return _mm_maskz_cvttph_epi16(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvttph_epi16(__m256h A) {
|
||||
// CHECK-LABEL: test_mm256_cvttph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
|
||||
return _mm256_cvttph_epi16(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvttph_epi16(__m256i A, __mmask16 B, __m256h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvttph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
|
||||
return _mm256_mask_cvttph_epi16(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvttph_epi16(__mmask16 A, __m256h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvttph_epi16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
|
||||
return _mm256_maskz_cvttph_epi16(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtepi16_ph(__m128i A) {
|
||||
// CHECK-LABEL: test_mm_cvtepi16_ph
|
||||
// CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
|
||||
return _mm_cvtepi16_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtepi16_ph(__m128h A, __mmask8 B, __m128i C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtepi16_ph
|
||||
// CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
|
||||
return _mm_mask_cvtepi16_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtepi16_ph(__mmask8 A, __m128i B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtepi16_ph
|
||||
// CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
|
||||
return _mm_maskz_cvtepi16_ph(A, B);
|
||||
}
|
||||
|
||||
__m256h test_mm256_cvtepi16_ph(__m256i A) {
|
||||
// CHECK-LABEL: test_mm256_cvtepi16_ph
|
||||
// CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
|
||||
return _mm256_cvtepi16_ph(A);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_cvtepi16_ph(__m256h A, __mmask16 B, __m256i C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtepi16_ph
|
||||
// CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
|
||||
return _mm256_mask_cvtepi16_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_cvtepi16_ph(__mmask16 A, __m256i B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtepi16_ph
|
||||
// CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
|
||||
return _mm256_maskz_cvtepi16_ph(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtph_epu16(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
|
||||
return _mm_cvtph_epu16(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvtph_epu16(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
|
||||
return _mm_mask_cvtph_epu16(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvtph_epu16(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
|
||||
return _mm_maskz_cvtph_epu16(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvtph_epu16(__m256h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
|
||||
return _mm256_cvtph_epu16(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvtph_epu16(__m256i A, __mmask16 B, __m256h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
|
||||
return _mm256_mask_cvtph_epu16(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvtph_epu16(__mmask16 A, __m256h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
|
||||
return _mm256_maskz_cvtph_epu16(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvttph_epu16(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvttph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
|
||||
return _mm_cvttph_epu16(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvttph_epu16(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvttph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
|
||||
return _mm_mask_cvttph_epu16(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvttph_epu16(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvttph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
|
||||
return _mm_maskz_cvttph_epu16(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvttph_epu16(__m256h A) {
|
||||
// CHECK-LABEL: test_mm256_cvttph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
|
||||
return _mm256_cvttph_epu16(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvttph_epu16(__m256i A, __mmask16 B, __m256h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvttph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
|
||||
return _mm256_mask_cvttph_epu16(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvttph_epu16(__mmask16 A, __m256h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvttph_epu16
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
|
||||
return _mm256_maskz_cvttph_epu16(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtepu16_ph(__m128i A) {
|
||||
// CHECK-LABEL: test_mm_cvtepu16_ph
|
||||
// CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
|
||||
return _mm_cvtepu16_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtepu16_ph(__m128h A, __mmask8 B, __m128i C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtepu16_ph
|
||||
// CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
|
||||
return _mm_mask_cvtepu16_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtepu16_ph(__mmask8 A, __m128i B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtepu16_ph
|
||||
// CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
|
||||
return _mm_maskz_cvtepu16_ph(A, B);
|
||||
}
|
||||
|
||||
__m256h test_mm256_cvtepu16_ph(__m256i A) {
|
||||
// CHECK-LABEL: test_mm256_cvtepu16_ph
|
||||
// CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
|
||||
return _mm256_cvtepu16_ph(A);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_cvtepu16_ph(__m256h A, __mmask16 B, __m256i C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtepu16_ph
|
||||
// CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
|
||||
return _mm256_mask_cvtepu16_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_cvtepu16_ph(__mmask16 A, __m256i B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtepu16_ph
|
||||
// CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
|
||||
return _mm256_maskz_cvtepu16_ph(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtph_epi32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
|
||||
return _mm_cvtph_epi32(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvtph_epi32(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
|
||||
return _mm_mask_cvtph_epi32(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvtph_epi32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
|
||||
return _mm_maskz_cvtph_epi32(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvtph_epi32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
|
||||
return _mm256_cvtph_epi32(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvtph_epi32(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
|
||||
return _mm256_mask_cvtph_epi32(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvtph_epi32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
|
||||
return _mm256_maskz_cvtph_epi32(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtph_epu32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
|
||||
return _mm_cvtph_epu32(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvtph_epu32(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
|
||||
return _mm_mask_cvtph_epu32(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvtph_epu32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
|
||||
return _mm_maskz_cvtph_epu32(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvtph_epu32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
|
||||
return _mm256_cvtph_epu32(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvtph_epu32(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
|
||||
return _mm256_mask_cvtph_epu32(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvtph_epu32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
|
||||
return _mm256_maskz_cvtph_epu32(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtepi32_ph(__m128i A) {
|
||||
// CHECK-LABEL: test_mm_cvtepi32_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
|
||||
return _mm_cvtepi32_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m128i C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtepi32_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
|
||||
return _mm_mask_cvtepi32_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtepi32_ph(__mmask8 A, __m128i B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtepi32_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
|
||||
return _mm_maskz_cvtepi32_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm256_cvtepi32_ph(__m256i A) {
|
||||
// CHECK-LABEL: test_mm256_cvtepi32_ph
|
||||
// CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
|
||||
return _mm256_cvtepi32_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm256_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m256i C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtepi32_ph
|
||||
// CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
|
||||
return _mm256_mask_cvtepi32_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm256_maskz_cvtepi32_ph(__mmask8 A, __m256i B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtepi32_ph
|
||||
// CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
|
||||
return _mm256_maskz_cvtepi32_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtepu32_ph(__m128i A) {
|
||||
// CHECK-LABEL: test_mm_cvtepu32_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
|
||||
return _mm_cvtepu32_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m128i C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtepu32_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
|
||||
return _mm_mask_cvtepu32_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtepu32_ph(__mmask8 A, __m128i B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtepu32_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
|
||||
return _mm_maskz_cvtepu32_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm256_cvtepu32_ph(__m256i A) {
|
||||
// CHECK-LABEL: test_mm256_cvtepu32_ph
|
||||
// CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
|
||||
return _mm256_cvtepu32_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm256_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m256i C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtepu32_ph
|
||||
// CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
|
||||
return _mm256_mask_cvtepu32_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm256_maskz_cvtepu32_ph(__mmask8 A, __m256i B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtepu32_ph
|
||||
// CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
|
||||
return _mm256_maskz_cvtepu32_ph(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvttph_epi32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvttph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
|
||||
return _mm_cvttph_epi32(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvttph_epi32(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvttph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
|
||||
return _mm_mask_cvttph_epi32(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvttph_epi32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvttph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
|
||||
return _mm_maskz_cvttph_epi32(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvttph_epi32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvttph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
|
||||
return _mm256_cvttph_epi32(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvttph_epi32(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvttph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
|
||||
return _mm256_mask_cvttph_epi32(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvttph_epi32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvttph_epi32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
|
||||
return _mm256_maskz_cvttph_epi32(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvttph_epu32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvttph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
|
||||
return _mm_cvttph_epu32(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvttph_epu32(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvttph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
|
||||
return _mm_mask_cvttph_epu32(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvttph_epu32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvttph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
|
||||
return _mm_maskz_cvttph_epu32(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvttph_epu32(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvttph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
|
||||
return _mm256_cvttph_epu32(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvttph_epu32(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvttph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
|
||||
return _mm256_mask_cvttph_epu32(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvttph_epu32(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvttph_epu32
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
|
||||
return _mm256_maskz_cvttph_epu32(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtepi64_ph(__m128i A) {
|
||||
// CHECK-LABEL: test_mm_cvtepi64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
|
||||
return _mm_cvtepi64_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m128i C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtepi64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
|
||||
return _mm_mask_cvtepi64_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtepi64_ph(__mmask8 A, __m128i B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtepi64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
|
||||
return _mm_maskz_cvtepi64_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm256_cvtepi64_ph(__m256i A) {
|
||||
// CHECK-LABEL: test_mm256_cvtepi64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
|
||||
return _mm256_cvtepi64_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm256_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m256i C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtepi64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
|
||||
return _mm256_mask_cvtepi64_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm256_maskz_cvtepi64_ph(__mmask8 A, __m256i B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtepi64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
|
||||
return _mm256_maskz_cvtepi64_ph(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtph_epi64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
|
||||
return _mm_cvtph_epi64(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvtph_epi64(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
|
||||
return _mm_mask_cvtph_epi64(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
|
||||
return _mm_maskz_cvtph_epi64(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvtph_epi64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
|
||||
return _mm256_cvtph_epi64(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvtph_epi64(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
|
||||
return _mm256_mask_cvtph_epi64(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
|
||||
return _mm256_maskz_cvtph_epi64(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtepu64_ph(__m128i A) {
|
||||
// CHECK-LABEL: test_mm_cvtepu64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
|
||||
return _mm_cvtepu64_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m128i C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtepu64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
|
||||
return _mm_mask_cvtepu64_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtepu64_ph(__mmask8 A, __m128i B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtepu64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
|
||||
return _mm_maskz_cvtepu64_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm256_cvtepu64_ph(__m256i A) {
|
||||
// CHECK-LABEL: test_mm256_cvtepu64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
|
||||
return _mm256_cvtepu64_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm256_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m256i C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtepu64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
|
||||
return _mm256_mask_cvtepu64_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm256_maskz_cvtepu64_ph(__mmask8 A, __m256i B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtepu64_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
|
||||
return _mm256_maskz_cvtepu64_ph(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvtph_epu64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
|
||||
return _mm_cvtph_epu64(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvtph_epu64(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
|
||||
return _mm_mask_cvtph_epu64(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
|
||||
return _mm_maskz_cvtph_epu64(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvtph_epu64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
|
||||
return _mm256_cvtph_epu64(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvtph_epu64(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
|
||||
return _mm256_mask_cvtph_epu64(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
|
||||
return _mm256_maskz_cvtph_epu64(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvttph_epi64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvttph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
|
||||
return _mm_cvttph_epi64(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvttph_epi64(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvttph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
|
||||
return _mm_mask_cvttph_epi64(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvttph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
|
||||
return _mm_maskz_cvttph_epi64(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvttph_epi64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvttph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
|
||||
return _mm256_cvttph_epi64(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvttph_epi64(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvttph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
|
||||
return _mm256_mask_cvttph_epi64(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvttph_epi64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
|
||||
return _mm256_maskz_cvttph_epi64(A, B);
|
||||
}
|
||||
|
||||
__m128i test_mm_cvttph_epu64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvttph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
|
||||
return _mm_cvttph_epu64(A);
|
||||
}
|
||||
|
||||
__m128i test_mm_mask_cvttph_epu64(__m128i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvttph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
|
||||
return _mm_mask_cvttph_epu64(A, B, C);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvttph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
|
||||
return _mm_maskz_cvttph_epu64(A, B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_cvttph_epu64(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvttph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
|
||||
return _mm256_cvttph_epu64(A);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_cvttph_epu64(__m256i A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvttph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
|
||||
return _mm256_mask_cvttph_epu64(A, B, C);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvttph_epu64
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
|
||||
return _mm256_maskz_cvttph_epu64(A, B);
|
||||
}
|
||||
|
||||
__m128 test_mm_cvtxph_ps(__m128h A) {
|
||||
// CHECK-LABEL: test_mm_cvtxph_ps
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
|
||||
return _mm_cvtxph_ps(A);
|
||||
}
|
||||
|
||||
__m128 test_mm_mask_cvtxph_ps(__m128 A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtxph_ps
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
|
||||
return _mm_mask_cvtxph_ps(A, B, C);
|
||||
}
|
||||
|
||||
__m128 test_mm_maskz_cvtxph_ps(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtxph_ps
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
|
||||
return _mm_maskz_cvtxph_ps(A, B);
|
||||
}
|
||||
|
||||
__m256 test_mm256_cvtxph_ps(__m128h A) {
|
||||
// CHECK-LABEL: test_mm256_cvtxph_ps
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
|
||||
return _mm256_cvtxph_ps(A);
|
||||
}
|
||||
|
||||
__m256 test_mm256_mask_cvtxph_ps(__m256 A, __mmask8 B, __m128h C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtxph_ps
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
|
||||
return _mm256_mask_cvtxph_ps(A, B, C);
|
||||
}
|
||||
|
||||
__m256 test_mm256_maskz_cvtxph_ps(__mmask8 A, __m128h B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtxph_ps
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
|
||||
return _mm256_maskz_cvtxph_ps(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_cvtxps_ph(__m128 A) {
|
||||
// CHECK-LABEL: test_mm_cvtxps_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
|
||||
return _mm_cvtxps_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_cvtxps_ph(__m128h A, __mmask8 B, __m128 C) {
|
||||
// CHECK-LABEL: test_mm_mask_cvtxps_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
|
||||
return _mm_mask_cvtxps_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_cvtxps_ph(__mmask8 A, __m128 B) {
|
||||
// CHECK-LABEL: test_mm_maskz_cvtxps_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
|
||||
return _mm_maskz_cvtxps_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm256_cvtxps_ph(__m256 A) {
|
||||
// CHECK-LABEL: test_mm256_cvtxps_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
|
||||
return _mm256_cvtxps_ph(A);
|
||||
}
|
||||
|
||||
__m128h test_mm256_mask_cvtxps_ph(__m128h A, __mmask8 B, __m256 C) {
|
||||
// CHECK-LABEL: test_mm256_mask_cvtxps_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
|
||||
return _mm256_mask_cvtxps_ph(A, B, C);
|
||||
}
|
||||
|
||||
__m128h test_mm256_maskz_cvtxps_ph(__mmask8 A, __m256 B) {
|
||||
// CHECK-LABEL: test_mm256_maskz_cvtxps_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
|
||||
return _mm256_maskz_cvtxps_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
|
||||
// CHECK-LABEL: @test_mm_mask_blend_ph
|
||||
// CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
|
||||
|
|
|
@ -5224,4 +5224,321 @@ let TargetPrefix = "x86" in {
|
|||
Intrinsic<[ llvm_i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
|
||||
|
||||
def int_x86_avx512fp16_mask_vcvtph2psx_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
|
||||
Intrinsic<[ llvm_v4f32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2psx_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
|
||||
Intrinsic<[ llvm_v8f32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2psx_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
|
||||
Intrinsic<[ llvm_v16f32_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtps2phx_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtps2phx_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtps2phx_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
|
||||
Intrinsic<[ llvm_v16f16_ty ],
|
||||
[ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtpd2ph_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtpd2ph_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtpd2ph_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2pd_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
|
||||
Intrinsic<[ llvm_v2f64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2pd_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
|
||||
Intrinsic<[ llvm_v4f64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2pd_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
|
||||
Intrinsic<[ llvm_v8f64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtsh2ss_round
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
|
||||
Intrinsic<[ llvm_v4f32_ty ],
|
||||
[ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtss2sh_round
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
|
||||
llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtsd2sh_round
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
|
||||
llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtsh2sd_round
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
|
||||
Intrinsic<[ llvm_v2f64_ty ],
|
||||
[ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
|
||||
llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
|
||||
|
||||
def int_x86_avx512fp16_mask_vcvtph2w_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
|
||||
Intrinsic<[ llvm_v8i16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2w_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
|
||||
Intrinsic<[ llvm_v16i16_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
|
||||
[ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2w_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
|
||||
Intrinsic<[ llvm_v32i16_ty ],
|
||||
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2w_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
|
||||
Intrinsic<[ llvm_v8i16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2w_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
|
||||
Intrinsic<[ llvm_v16i16_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
|
||||
[ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2w_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
|
||||
Intrinsic<[ llvm_v32i16_ty ],
|
||||
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2uw_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
|
||||
Intrinsic<[ llvm_v8i16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2uw_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
|
||||
Intrinsic<[ llvm_v16i16_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
|
||||
[ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2uw_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
|
||||
Intrinsic<[ llvm_v32i16_ty ],
|
||||
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2uw_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
|
||||
Intrinsic<[ llvm_v8i16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2uw_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
|
||||
Intrinsic<[ llvm_v16i16_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
|
||||
[ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2uw_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
|
||||
Intrinsic<[ llvm_v32i16_ty ],
|
||||
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
|
||||
def int_x86_avx512fp16_mask_vcvtph2dq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
|
||||
Intrinsic<[ llvm_v4i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2dq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
|
||||
Intrinsic<[ llvm_v8i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2dq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
|
||||
Intrinsic<[ llvm_v16i32_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2udq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
|
||||
Intrinsic<[ llvm_v4i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2udq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
|
||||
Intrinsic<[ llvm_v8i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2udq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
|
||||
Intrinsic<[ llvm_v16i32_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtdq2ph_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtudq2ph_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2dq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
|
||||
Intrinsic<[ llvm_v4i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2dq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
|
||||
Intrinsic<[ llvm_v8i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2dq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
|
||||
Intrinsic<[ llvm_v16i32_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2udq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
|
||||
Intrinsic<[ llvm_v4i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2udq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
|
||||
Intrinsic<[ llvm_v8i32_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2udq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
|
||||
Intrinsic<[ llvm_v16i32_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
|
||||
def int_x86_avx512fp16_mask_vcvtqq2ph_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtqq2ph_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2qq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
|
||||
Intrinsic<[ llvm_v2i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2qq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
|
||||
Intrinsic<[ llvm_v4i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2qq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
|
||||
Intrinsic<[ llvm_v8i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtuqq2ph_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtuqq2ph_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2uqq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
|
||||
Intrinsic<[ llvm_v2i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2uqq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
|
||||
Intrinsic<[ llvm_v4i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvtph2uqq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
|
||||
Intrinsic<[ llvm_v8i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2qq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
|
||||
Intrinsic<[ llvm_v2i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2qq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
|
||||
Intrinsic<[ llvm_v4i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2qq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
|
||||
Intrinsic<[ llvm_v8i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2uqq_128
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
|
||||
Intrinsic<[ llvm_v2i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2uqq_256
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
|
||||
Intrinsic<[ llvm_v4i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_mask_vcvttph2uqq_512
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
|
||||
Intrinsic<[ llvm_v8i64_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
|
||||
def int_x86_avx512fp16_vcvtsh2si32
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
|
||||
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvtsh2usi32
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
|
||||
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvtsh2si64
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
|
||||
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvtsh2usi64
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
|
||||
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvtusi2sh
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
|
||||
def int_x86_avx512fp16_vcvtusi642sh
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
|
||||
def int_x86_avx512fp16_vcvtsi2sh
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
|
||||
def int_x86_avx512fp16_vcvtsi642sh
|
||||
: GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
|
||||
def int_x86_avx512fp16_vcvttsh2si32
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
|
||||
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvttsh2si64
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
|
||||
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvttsh2usi32
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
|
||||
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
def int_x86_avx512fp16_vcvttsh2usi64
|
||||
: GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
|
||||
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
|
||||
}
|
||||
|
|
|
@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
|
|||
HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
|
||||
HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
|
||||
HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
|
||||
HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2")
|
||||
HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
|
||||
HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
|
||||
HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
|
||||
|
|
|
@ -237,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
|
|||
return FPEXT_F16_F32;
|
||||
if (RetVT == MVT::f64)
|
||||
return FPEXT_F16_F64;
|
||||
if (RetVT == MVT::f80)
|
||||
return FPEXT_F16_F80;
|
||||
if (RetVT == MVT::f128)
|
||||
return FPEXT_F16_F128;
|
||||
} else if (OpVT == MVT::f32) {
|
||||
|
|
|
@ -1931,6 +1931,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::SETCC, MVT::f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
|
||||
if (isTypeLegal(MVT::f80)) {
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
|
||||
}
|
||||
|
||||
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
|
||||
setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
|
||||
|
@ -1939,8 +1946,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setGroup(MVT::v32f16);
|
||||
addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
|
||||
MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
|
||||
MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
|
||||
MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
|
||||
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
|
||||
MVT::v32i16);
|
||||
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
|
||||
|
@ -1960,6 +1990,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
|
||||
|
||||
// INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
|
||||
|
@ -2001,6 +2046,37 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget.hasFP16()) {
|
||||
// vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
|
||||
// vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
|
||||
// vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
|
||||
// vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
|
||||
|
@ -19993,6 +20069,43 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
|
|||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// Try to use a packed vector operation to handle i64 on 32-bit targets.
|
||||
static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
assert((Op.getOpcode() == ISD::SINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
|
||||
Op.getOpcode() == ISD::UINT_TO_FP) &&
|
||||
"Unexpected opcode!");
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
|
||||
return SDValue();
|
||||
|
||||
// Pack the i64 into a vector, do the operation and extract.
|
||||
|
||||
assert(Subtarget.hasFP16() && "Expected FP16");
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
|
||||
if (IsStrict) {
|
||||
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
|
||||
{Op.getOperand(0), InVec});
|
||||
SDValue Chain = CvtVec.getValue(1);
|
||||
SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
return DAG.getMergeValues({Value, Chain}, dl);
|
||||
}
|
||||
|
||||
SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);
|
||||
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
|
||||
const X86Subtarget &Subtarget) {
|
||||
switch (Opcode) {
|
||||
|
@ -20245,6 +20358,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
|||
|
||||
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
|
||||
return V;
|
||||
if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
// SSE doesn't have an i16 conversion so we need to promote.
|
||||
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
|
||||
|
@ -20724,6 +20839,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
|||
|
||||
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
|
||||
return V;
|
||||
if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
// The transform for i64->f64 isn't correct for 0 when rounding to negative
|
||||
// infinity. It produces -0.0, so disable under strictfp.
|
||||
|
@ -21505,9 +21622,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
|
||||
MVT VT = Op->getSimpleValueType(0);
|
||||
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
|
||||
SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
SDValue Res;
|
||||
if (VT.isVector()) {
|
||||
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
|
||||
MVT ResVT = MVT::v4i32;
|
||||
|
@ -21532,10 +21651,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res =
|
||||
DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
|
||||
Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
Res = DAG.getNode(Opc, dl, ResVT, Src);
|
||||
|
@ -21549,6 +21666,67 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
return Res;
|
||||
}
|
||||
|
||||
if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) {
|
||||
if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
|
||||
return Op;
|
||||
|
||||
MVT ResVT = VT;
|
||||
MVT EleVT = VT.getVectorElementType();
|
||||
if (EleVT != MVT::i64)
|
||||
ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
|
||||
|
||||
if (SrcVT != MVT::v8f16) {
|
||||
SDValue Tmp =
|
||||
IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
|
||||
SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
|
||||
Ops[0] = Src;
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
|
||||
}
|
||||
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI
|
||||
: X86ISD::STRICT_CVTTP2UI,
|
||||
dl, {ResVT, MVT::Other}, {Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl,
|
||||
ResVT, Src);
|
||||
}
|
||||
|
||||
// TODO: Need to add exception check code for strict FP.
|
||||
if (EleVT.getSizeInBits() < 16) {
|
||||
ResVT = MVT::getVectorVT(EleVT, 8);
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res);
|
||||
}
|
||||
|
||||
if (ResVT != VT)
|
||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
return Res;
|
||||
}
|
||||
|
||||
if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
|
||||
: ISD::STRICT_FP_TO_UINT,
|
||||
dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
|
||||
MVT::v8i32, Src);
|
||||
}
|
||||
|
||||
// TODO: Need to add exception check code for strict FP.
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
|
||||
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
return Res;
|
||||
}
|
||||
|
||||
// v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
|
||||
if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
|
||||
assert(!IsSigned && "Expected unsigned conversion!");
|
||||
|
@ -21572,10 +21750,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
|
||||
{Op->getOperand(0), Src});
|
||||
{Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
|
||||
|
@ -21603,10 +21780,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
|
||||
{Op->getOperand(0), Src});
|
||||
{Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
|
||||
|
@ -21631,7 +21807,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
|
||||
{Src, Zero, Zero, Zero});
|
||||
Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
|
||||
{Op->getOperand(0), Tmp});
|
||||
{Chain, Tmp});
|
||||
SDValue Chain = Tmp.getValue(1);
|
||||
Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
@ -21714,17 +21890,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
// FIXME: This does not generate an invalid exception if the input does not
|
||||
// fit in i32. PR44019
|
||||
if (Subtarget.is64Bit()) {
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other},
|
||||
{ Op.getOperand(0), Src });
|
||||
Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other},
|
||||
{Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else
|
||||
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
|
||||
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({ Res, Chain }, dl);
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
@ -21739,17 +21914,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
// fit in i16. PR44019
|
||||
if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
|
||||
assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other},
|
||||
{ Op.getOperand(0), Src });
|
||||
Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other},
|
||||
{Chain, Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else
|
||||
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
|
||||
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({ Res, Chain }, dl);
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
@ -21765,7 +21939,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
else
|
||||
LC = RTLIB::getFPTOUINT(SrcVT, VT);
|
||||
|
||||
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
|
||||
MakeLibCallOptions CallOptions;
|
||||
std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
|
||||
SDLoc(Op), Chain);
|
||||
|
@ -21777,7 +21950,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
|
||||
// Fall back to X87.
|
||||
SDValue Chain;
|
||||
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({V, Chain}, dl);
|
||||
|
@ -22004,6 +22176,35 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
|||
if (VT == MVT::f128)
|
||||
return SDValue();
|
||||
|
||||
if (VT == MVT::f80) {
|
||||
if (SVT == MVT::f16) {
|
||||
assert(Subtarget.hasFP16() && "Unexpected features!");
|
||||
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
|
||||
MakeLibCallOptions CallOptions;
|
||||
std::pair<SDValue, SDValue> Tmp =
|
||||
makeLibCall(DAG, LC, VT, In, CallOptions, DL,
|
||||
IsStrict ? Op.getOperand(0) : SDValue());
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Tmp.first, Tmp.second}, DL);
|
||||
else
|
||||
return Tmp.first;
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
if (SVT.getVectorElementType() == MVT::f16) {
|
||||
assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
|
||||
if (SVT == MVT::v2f16)
|
||||
In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
|
||||
DAG.getUNDEF(MVT::v2f16));
|
||||
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
|
||||
DAG.getUNDEF(MVT::v4f16));
|
||||
if (IsStrict)
|
||||
return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
|
||||
{Op->getOperand(0), Res});
|
||||
return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
|
||||
}
|
||||
|
||||
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
|
||||
|
||||
SDValue Res =
|
||||
|
@ -22017,8 +22218,11 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
|
||||
// It's legal except when f128 is involved
|
||||
if (In.getSimpleValueType() != MVT::f128)
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT SVT = In.getSimpleValueType();
|
||||
|
||||
// It's legal except when f128 is involved or we're converting f80->f16.
|
||||
if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80))
|
||||
return Op;
|
||||
|
||||
return SDValue();
|
||||
|
@ -31113,6 +31317,51 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
|
||||
if (VT.isVector() && Subtarget.hasFP16() &&
|
||||
SrcVT.getVectorElementType() == MVT::f16) {
|
||||
EVT EleVT = VT.getVectorElementType();
|
||||
EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
|
||||
|
||||
if (SrcVT != MVT::v8f16) {
|
||||
SDValue Tmp =
|
||||
IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
|
||||
SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
|
||||
Ops[0] = Src;
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
|
||||
}
|
||||
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
unsigned Opc =
|
||||
IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
|
||||
Res =
|
||||
DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
|
||||
Res = DAG.getNode(Opc, dl, ResVT, Src);
|
||||
}
|
||||
|
||||
// TODO: Need to add exception check code for strict FP.
|
||||
if (EleVT.getSizeInBits() < 16) {
|
||||
MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8);
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res);
|
||||
|
||||
// Now widen to 128 bits.
|
||||
unsigned NumConcats = 128 / TmpVT.getSizeInBits();
|
||||
MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats);
|
||||
SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
|
||||
ConcatOps[0] = Res;
|
||||
Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
|
||||
}
|
||||
|
||||
Results.push_back(Res);
|
||||
if (IsStrict)
|
||||
Results.push_back(Chain);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
|
||||
assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
|
||||
"Unexpected type action!");
|
||||
|
@ -31287,9 +31536,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
|
||||
N->getOpcode() == ISD::STRICT_SINT_TO_FP;
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
|
||||
if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&
|
||||
Subtarget.hasVLX()) {
|
||||
if (Src.getValueType().getVectorElementType() == MVT::i16)
|
||||
return;
|
||||
|
||||
if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
|
||||
IsStrict ? DAG.getConstant(0, dl, MVT::v2i32)
|
||||
: DAG.getUNDEF(MVT::v2i32));
|
||||
if (IsStrict) {
|
||||
unsigned Opc =
|
||||
IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
|
||||
SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
|
||||
{N->getOperand(0), Src});
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Res.getValue(1));
|
||||
} else {
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
|
||||
Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src));
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (VT != MVT::v2f32)
|
||||
return;
|
||||
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
|
||||
if (IsStrict) {
|
||||
|
@ -31390,14 +31661,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
case ISD::FP_ROUND: {
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
|
||||
if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
|
||||
SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
|
||||
: DAG.getUNDEF(MVT::v2f32);
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
|
||||
}
|
||||
if (!isTypeLegal(Src.getValueType()))
|
||||
return;
|
||||
SDValue V;
|
||||
if (IsStrict)
|
||||
V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
|
||||
{N->getOperand(0), N->getOperand(1)});
|
||||
V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
|
||||
{N->getOperand(0), Src});
|
||||
else
|
||||
V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
|
||||
V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
|
||||
Results.push_back(V);
|
||||
if (IsStrict)
|
||||
Results.push_back(V.getValue(1));
|
||||
|
@ -31409,6 +31687,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
// No other ValueType for FP_EXTEND should reach this point.
|
||||
assert(N->getValueType(0) == MVT::v2f32 &&
|
||||
"Do not know how to legalize this Node");
|
||||
if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
|
||||
return;
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
|
||||
SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
|
||||
: DAG.getUNDEF(MVT::v2f16);
|
||||
SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
|
||||
if (IsStrict)
|
||||
V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other},
|
||||
{N->getOperand(0), V});
|
||||
else
|
||||
V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V);
|
||||
Results.push_back(V);
|
||||
if (IsStrict)
|
||||
Results.push_back(V.getValue(1));
|
||||
return;
|
||||
}
|
||||
case ISD::INTRINSIC_W_CHAIN: {
|
||||
|
@ -49415,10 +49708,31 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
|
|||
EVT VT = N->getValueType(0);
|
||||
EVT InVT = Op0.getValueType();
|
||||
|
||||
// UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
|
||||
// UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
|
||||
// UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
|
||||
if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
|
||||
unsigned ScalarSize = InVT.getScalarSizeInBits();
|
||||
if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
|
||||
return SDValue();
|
||||
SDLoc dl(N);
|
||||
EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
ScalarSize < 16 ? MVT::i16
|
||||
: ScalarSize < 32 ? MVT::i32
|
||||
: MVT::i64,
|
||||
InVT.getVectorNumElements());
|
||||
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
|
||||
if (IsStrict)
|
||||
return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
|
||||
{N->getOperand(0), P});
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
|
||||
}
|
||||
|
||||
// UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
|
||||
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
|
||||
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
|
||||
if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
|
||||
if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
|
||||
VT.getScalarType() != MVT::f16) {
|
||||
SDLoc dl(N);
|
||||
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
|
||||
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
|
||||
|
@ -49457,10 +49771,31 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
|
|||
EVT VT = N->getValueType(0);
|
||||
EVT InVT = Op0.getValueType();
|
||||
|
||||
// SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
|
||||
// SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
|
||||
// SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
|
||||
if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
|
||||
unsigned ScalarSize = InVT.getScalarSizeInBits();
|
||||
if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
|
||||
return SDValue();
|
||||
SDLoc dl(N);
|
||||
EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
ScalarSize < 16 ? MVT::i16
|
||||
: ScalarSize < 32 ? MVT::i32
|
||||
: MVT::i64,
|
||||
InVT.getVectorNumElements());
|
||||
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
|
||||
if (IsStrict)
|
||||
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
|
||||
{N->getOperand(0), P});
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
|
||||
}
|
||||
|
||||
// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
|
||||
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
|
||||
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
|
||||
if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
|
||||
if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
|
||||
VT.getScalarType() != MVT::f16) {
|
||||
SDLoc dl(N);
|
||||
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
|
||||
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
|
||||
|
@ -51306,6 +51641,9 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
|
|||
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget.hasFP16())
|
||||
return SDValue();
|
||||
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
|
||||
|
@ -51414,6 +51752,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
|
|||
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget.hasFP16())
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Src = N->getOperand(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4455,8 +4455,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0 },
|
||||
{ X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0 },
|
||||
{ X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0 },
|
||||
{ X86::VCVTSD2SHZrr_Intk, X86::VCVTSD2SHZrm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VCVTSH2SDZrr_Intk, X86::VCVTSH2SDZrm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VCVTSH2SSZrr_Intk, X86::VCVTSH2SSZrm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VCVTSS2SHZrr_Intk, X86::VCVTSS2SHZrm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
|
||||
{ X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0 },
|
||||
{ X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0 },
|
||||
|
|
|
@ -130,14 +130,12 @@ def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
|
|||
def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
|
||||
|
||||
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTCisSameSizeAs<0, 1>]>>;
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>]>>;
|
||||
|
||||
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTCisSameSizeAs<0, 1>]>,
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>]>,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
def X86any_vfpext : PatFrags<(ops node:$src),
|
||||
|
@ -145,13 +143,13 @@ def X86any_vfpext : PatFrags<(ops node:$src),
|
|||
(X86vfpext node:$src)]>;
|
||||
|
||||
def X86vfpround: SDNode<"X86ISD::VFPROUND",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, f64>,
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>,
|
||||
SDTCisOpSmallerThanOp<0, 1>]>>;
|
||||
|
||||
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, f64>,
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>,
|
||||
SDTCisOpSmallerThanOp<0, 1>]>,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
|
@ -160,33 +158,32 @@ def X86any_vfpround : PatFrags<(ops node:$src),
|
|||
(X86vfpround node:$src)]>;
|
||||
|
||||
def X86frounds : SDNode<"X86ISD::VFPROUNDS",
|
||||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
|
||||
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCVecEltisVT<2, f64>,
|
||||
SDTCisFP<2>, SDTCisVec<2>,
|
||||
SDTCisSameSizeAs<0, 2>]>>;
|
||||
|
||||
def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
|
||||
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCVecEltisVT<2, f64>,
|
||||
SDTCisFP<2>, SDTCisVec<2>,
|
||||
SDTCisSameSizeAs<0, 2>,
|
||||
SDTCisVT<3, i32>]>>;
|
||||
|
||||
def X86fpexts : SDNode<"X86ISD::VFPEXTS",
|
||||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
|
||||
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCVecEltisVT<2, f32>,
|
||||
SDTCisFP<2>, SDTCisVec<2>,
|
||||
SDTCisSameSizeAs<0, 2>]>>;
|
||||
def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE",
|
||||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
|
||||
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCVecEltisVT<2, f32>,
|
||||
SDTCisFP<2>, SDTCisVec<2>,
|
||||
SDTCisSameSizeAs<0, 2>]>>;
|
||||
|
||||
def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
|
||||
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, f64>,
|
||||
SDTCisSameSizeAs<0, 1>,
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCVecEltisVT<3, i1>,
|
||||
SDTCisSameNumEltsAs<1, 3>]>>;
|
||||
|
@ -709,7 +706,6 @@ def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
|
|||
// Masked versions of above
|
||||
def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisFP<0>, SDTCisInt<1>,
|
||||
SDTCisSameSizeAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCVecEltisVT<3, i1>,
|
||||
SDTCisSameNumEltsAs<1, 3>]>;
|
||||
|
@ -757,12 +753,12 @@ def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
|
|||
SDTCVecEltisVT<4, i1>,
|
||||
SDTCisSameNumEltsAs<1, 4>]> >;
|
||||
def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>,
|
||||
SDTCisOpSmallerThanOp<1, 0>]>>;
|
||||
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
|
||||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, f64>,
|
||||
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
|
||||
SDTCisFP<1>, SDTCisVec<1>,
|
||||
SDTCisOpSmallerThanOp<0, 1>,
|
||||
SDTCisVT<2, i32>]>>;
|
||||
|
||||
|
|
|
@ -5177,6 +5177,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
|
|||
case X86::VCVTUSI642SDZrr_Int:
|
||||
case X86::VCVTUSI642SDZrrb_Int:
|
||||
case X86::VCVTUSI642SDZrm_Int:
|
||||
case X86::VCVTSI2SHZrr:
|
||||
case X86::VCVTSI2SHZrm:
|
||||
case X86::VCVTSI2SHZrr_Int:
|
||||
case X86::VCVTSI2SHZrrb_Int:
|
||||
case X86::VCVTSI2SHZrm_Int:
|
||||
case X86::VCVTSI642SHZrr:
|
||||
case X86::VCVTSI642SHZrm:
|
||||
case X86::VCVTSI642SHZrr_Int:
|
||||
case X86::VCVTSI642SHZrrb_Int:
|
||||
case X86::VCVTSI642SHZrm_Int:
|
||||
case X86::VCVTUSI2SHZrr:
|
||||
case X86::VCVTUSI2SHZrm:
|
||||
case X86::VCVTUSI2SHZrr_Int:
|
||||
case X86::VCVTUSI2SHZrrb_Int:
|
||||
case X86::VCVTUSI2SHZrm_Int:
|
||||
case X86::VCVTUSI642SHZrr:
|
||||
case X86::VCVTUSI642SHZrm:
|
||||
case X86::VCVTUSI642SHZrr_Int:
|
||||
case X86::VCVTUSI642SHZrrb_Int:
|
||||
case X86::VCVTUSI642SHZrm_Int:
|
||||
// Load folding won't effect the undef register update since the input is
|
||||
// a GPR.
|
||||
return OpNum == 1 && !ForLoadFold;
|
||||
|
@ -5278,6 +5298,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
|
|||
case X86::VSQRTSDZrb_Int:
|
||||
case X86::VSQRTSDZm:
|
||||
case X86::VSQRTSDZm_Int:
|
||||
case X86::VCVTSD2SHZrr:
|
||||
case X86::VCVTSD2SHZrr_Int:
|
||||
case X86::VCVTSD2SHZrrb_Int:
|
||||
case X86::VCVTSD2SHZrm:
|
||||
case X86::VCVTSD2SHZrm_Int:
|
||||
case X86::VCVTSS2SHZrr:
|
||||
case X86::VCVTSS2SHZrr_Int:
|
||||
case X86::VCVTSS2SHZrrb_Int:
|
||||
case X86::VCVTSS2SHZrm:
|
||||
case X86::VCVTSS2SHZrm_Int:
|
||||
case X86::VCVTSH2SDZrr:
|
||||
case X86::VCVTSH2SDZrr_Int:
|
||||
case X86::VCVTSH2SDZrrb_Int:
|
||||
case X86::VCVTSH2SDZrm:
|
||||
case X86::VCVTSH2SDZrm_Int:
|
||||
case X86::VCVTSH2SSZrr:
|
||||
case X86::VCVTSH2SSZrr_Int:
|
||||
case X86::VCVTSH2SSZrrb_Int:
|
||||
case X86::VCVTSH2SSZrm:
|
||||
case X86::VCVTSH2SSZrm_Int:
|
||||
return OpNum == 1;
|
||||
case X86::VMOVSSZrrk:
|
||||
case X86::VMOVSDZrrk:
|
||||
|
|
|
@ -1747,20 +1747,20 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
|
|||
// XMM only
|
||||
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
|
||||
[(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
|
||||
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
|
||||
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
|
||||
[(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
|
||||
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
|
||||
|
||||
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
|
||||
[(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
|
||||
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
|
||||
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
|
||||
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
|
||||
[(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
|
||||
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
|
||||
} // Predicates = [HasAVX, NoVLX]
|
||||
|
||||
|
@ -1771,11 +1771,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
|
|||
|
||||
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
|
||||
[(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
|
||||
Sched<[WriteCvtPD2PS]>, SIMD_EXC;
|
||||
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvtpd2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
|
||||
[(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
|
||||
Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1006,6 +1006,117 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::FMULS, X86ISD::FMULS_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_sub_sh_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FSUBS, X86ISD::FSUBS_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtdq2ph_128, TRUNCATE_TO_REG,
|
||||
X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_128, TRUNCATE_TO_REG,
|
||||
X86ISD::VFPROUND, X86ISD::VMFPROUND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_256, TRUNCATE_TO_REG,
|
||||
X86ISD::VFPROUND, X86ISD::VMFPROUND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_256, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_256, INTR_TYPE_1OP_MASK, ISD::FP_EXTEND, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_128, TRUNCATE_TO_REG,
|
||||
X86ISD::VFPROUND, X86ISD::VMFPROUND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_256, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_128, TRUNCATE_TO_REG,
|
||||
X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_256, TRUNCATE_TO_REG,
|
||||
X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsd2sh_round, INTR_TYPE_SCALAR_MASK_RND,
|
||||
X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
|
||||
X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2ss_round, INTR_TYPE_SCALAR_MASK_SAE,
|
||||
X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtss2sh_round, INTR_TYPE_SCALAR_MASK_RND,
|
||||
X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2UI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTTP2SI, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtudq2ph_128, TRUNCATE_TO_REG,
|
||||
X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_128, TRUNCATE_TO_REG,
|
||||
X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
|
||||
X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_256, TRUNCATE_TO_REG,
|
||||
X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
|
||||
X86_INTRINSIC_DATA(avx512fp16_max_ph_128, INTR_TYPE_2OP, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_max_ph_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_max_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
|
||||
|
@ -1015,6 +1126,23 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512fp16_mul_ph_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_sub_ph_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcomi_sh, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
|
||||
/*fp16 scalar convert instruction*/
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtsi2sh, INTR_TYPE_2OP,
|
||||
X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtsi642sh, INTR_TYPE_2OP,
|
||||
X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtusi2sh, INTR_TYPE_2OP,
|
||||
X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtusi642sh, INTR_TYPE_2OP,
|
||||
X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
|
||||
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
|
||||
|
|
|
@ -282,3 +282,364 @@ define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1
|
|||
%res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
|
||||
ret <32 x half> %res1
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_sae:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd {sae}, %xmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(<8 x half>* %px0, <8 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <8 x half>, <8 x half>* %px0, align 16
|
||||
%res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtpd2ph {rz-sae}, %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(<8 x double>* %px0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtpd2phz (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <8 x double>, <8 x double>* %px0, align 64
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtss2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsd2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32)
|
||||
|
||||
define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsh2ss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32)
|
||||
|
||||
define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsh2sd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx %ymm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32)
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4)
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2phx %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4)
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4)
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2phx {rd-sae}, %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vcvtps2phx {ru-sae}, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9)
|
||||
%res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10)
|
||||
%res2 = fadd <16 x half> %res, %res1
|
||||
ret <16 x half> %res2
|
||||
}
|
||||
|
|
|
@ -402,3 +402,403 @@ define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
|
|||
%res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
|
||||
ret <16 x half> %res0
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
|
||||
|
||||
define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
|
||||
|
||||
define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <4 x double>, <4 x double>* %px0, align 32
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <2 x double>, <2 x double>* %px0, align 16
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)
|
||||
|
||||
define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)
|
||||
|
||||
define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
|
||||
%res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
|
||||
%res2 = fadd <8 x half> %res, %res1
|
||||
ret <8 x half> %res2
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
|
|
@ -274,6 +274,68 @@ entry:
|
|||
ret <32 x i1> %0
|
||||
}
|
||||
|
||||
define <8 x half> @regression_test1(<8 x half> %x, <8 x half> %y) #0 {
|
||||
; CHECK-LABEL: regression_test1:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm2
|
||||
; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%a = fsub <8 x half> %x, %y
|
||||
%b = fadd <8 x half> %x, %y
|
||||
%c = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
ret <8 x half> %c
|
||||
}
|
||||
|
||||
define <8 x i16> @regression_test2(<8 x float> %x) #0 {
|
||||
; CHECK-LABEL: regression_test2:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%a = fptoui <8 x float> %x to <8 x i16>
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
||||
define <8 x i16> @regression_test3(<8 x float> %x) #0 {
|
||||
; CHECK-LABEL: regression_test3:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%a = fptosi <8 x float> %x to <8 x i16>
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
||||
define <8 x i16> @regression_test4(<8 x double> %x) #0 {
|
||||
; CHECK-LABEL: regression_test4:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0
|
||||
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%a = fptoui <8 x double> %x to <8 x i16>
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
||||
define <8 x i16> @regression_test5(<8 x double> %x) #0 {
|
||||
; CHECK-LABEL: regression_test5:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%a = fptosi <8 x double> %x to <8 x i16>
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
||||
define <8 x i1> @fcmp_v8f16(<8 x half> %a, <8 x half> %b)
|
||||
; CHECK-LABEL: fcmp_v8f16:
|
||||
; CHECK: ## %bb.0: ## %entry
|
||||
|
|
|
@ -0,0 +1,549 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 | FileCheck %s
|
||||
|
||||
declare <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16>, i32)
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = sitofp <32 x i16> %arg0 to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b(i16* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2(i16* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
%res0 = sitofp <32 x i16> %val to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask(<32 x i16> %arg0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2(<32 x i16> %arg0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <32 x i16> %arg0 to <32 x half>
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = sitofp <32 x i16> %arg0 to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%val = load <32 x i16>, <32 x i16>* %arg0
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%val = load <32 x i16>, <32 x i16>* %arg0
|
||||
%res0 = sitofp <32 x i16> %val to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half>, <32 x i16>, i32, i32)
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2w (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w {rd-sae}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_z(<32 x half> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2w (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <32 x half>, <32 x half>* %arg0
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
|
||||
declare <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16>, i32)
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = uitofp <32 x i16> %arg0 to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b(i16* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2(i16* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
%res0 = uitofp <32 x i16> %val to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask(<32 x i16> %arg0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2(<32 x i16> %arg0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <32 x i16> %arg0 to <32 x half>
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%res0 = uitofp <32 x i16> %arg0 to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%val = load <32 x i16>, <32 x i16>* %arg0
|
||||
%res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i32 %mask to <32 x i1>
|
||||
%val = load <32 x i16>, <32 x i16>* %arg0
|
||||
%res0 = uitofp <32 x i16> %val to <32 x half>
|
||||
%res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half>, <32 x i16>, i32, i32)
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw {rd-sae}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_z(<32 x half> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <32 x half>, <32 x half>* %arg0
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half>, <32 x i16>, i32, i32)
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2w (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_sae:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w {sae}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_z(<32 x half> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2w (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <32 x half>, <32 x half>* %arg0
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half>, <32 x i16>, i32, i32)
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw (%rdi){1to32}, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_sae:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw {sae}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_z(<32 x half> %arg0, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw (%rdi), %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <32 x half>, <32 x half>* %arg0
|
||||
%res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
|
||||
ret <32 x i16> %res
|
||||
}
|
|
@ -0,0 +1,770 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%res0 = sitofp <16 x i16> %arg0 to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_b(i16* %arg0, <16 x half> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi){1to16}, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
%res0 = sitofp <16 x i16> %val to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <16 x i16> %arg0 to <16 x half>
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%res0 = sitofp <16 x i16> %arg0 to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_load(<16 x i16>* %arg0, <16 x half> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi), %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%val = load <16 x i16>, <16 x i16>* %arg0
|
||||
%res0 = sitofp <16 x i16> %val to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2w (%rdi){1to16}, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2w %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_z(<16 x half> %arg0, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2w (%rdi), %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <16 x half>, <16 x half>* %arg0
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%res0 = uitofp <16 x i16> %arg0 to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_b(i16* %arg0, <16 x half> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi){1to16}, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
%res0 = uitofp <16 x i16> %val to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <16 x i16> %arg0 to <16 x half>
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%res0 = uitofp <16 x i16> %arg0 to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_load(<16 x i16>* %arg0, <16 x half> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi), %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i16 %mask to <16 x i1>
|
||||
%val = load <16 x i16>, <16 x i16>* %arg0
|
||||
%res0 = uitofp <16 x i16> %val to <16 x half>
|
||||
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw (%rdi){1to16}, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_z(<16 x half> %arg0, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw (%rdi), %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <16 x half>, <16 x half>* %arg0
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2w (%rdi){1to16}, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_z(<16 x half> %arg0, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2w (%rdi), %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <16 x half>, <16 x half>* %arg0
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw (%rdi){1to16}, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_z(<16 x half> %arg0, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw (%rdi), %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <16 x half>, <16 x half>* %arg0
|
||||
%res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%res0 = sitofp <8 x i16> %arg0 to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_b(i16* %arg0, <8 x half> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi){1to8}, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%res0 = sitofp <8 x i16> %val to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <8 x i16> %arg0 to <8 x half>
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%res0 = sitofp <8 x i16> %arg0 to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_load(<8 x i16>* %arg0, <8 x half> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtw2ph (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%val = load <8 x i16>, <8 x i16>* %arg0
|
||||
%res0 = sitofp <8 x i16> %val to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2w (%rdi){1to8}, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_z(<8 x half> %arg0, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2w %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2w (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <8 x half>, <8 x half>* %arg0
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%res0 = uitofp <8 x i16> %arg0 to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_b(i16* %arg0, <8 x half> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi){1to8}, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%scalar = load i16, i16* %arg0
|
||||
%scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
|
||||
%val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%res0 = uitofp <8 x i16> %val to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <8 x i16> %arg0 to <8 x half>
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%res0 = uitofp <8 x i16> %arg0 to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_load(<8 x i16>* %arg0, <8 x half> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtuw2ph (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%msk = bitcast i8 %mask to <8 x i1>
|
||||
%val = load <8 x i16>, <8 x i16>* %arg0
|
||||
%res0 = uitofp <8 x i16> %val to <8 x half>
|
||||
%res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw (%rdi){1to8}, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_z(<8 x half> %arg0, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtph2uw (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <8 x half>, <8 x half>* %arg0
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2w (%rdi){1to8}, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_z(<8 x half> %arg0, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2w (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <8 x half>, <8 x half>* %arg0
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_b:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw (%rdi){1to8}, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%scalar = load half, half* %arg0
|
||||
%scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
|
||||
%val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_z(<8 x half> %arg0, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvttph2uw (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%val = load <8 x half>, <8 x half>* %arg0
|
||||
%res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_u16tofp4(<4 x i16> %arg0) {
|
||||
; CHECK-LABEL: test_u16tofp4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <4 x i16> %arg0 to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @test_s16tofp2(<2 x i16> %arg0) {
|
||||
; CHECK-LABEL: test_s16tofp2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <2 x i16> %arg0 to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_u8tofp4(<4 x i8> %arg0) {
|
||||
; CHECK-LABEL: test_u8tofp4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <4 x i8> %arg0 to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @test_s8tofp2(<2 x i8> %arg0) {
|
||||
; CHECK-LABEL: test_s8tofp2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <2 x i8> %arg0 to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @test_u1tofp2(<2 x i1> %arg0) {
|
||||
; CHECK-LABEL: test_u1tofp2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <2 x i1> %arg0 to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
|
||||
; CHECK-LABEL: test_s17tofp4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpslld $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsrad $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <4 x i17> %arg0 to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
|
||||
; CHECK-LABEL: test_u33tofp2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <2 x i33> %arg0 to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -187,3 +187,506 @@ define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i
|
|||
%res13 = and i8 %res11, %res12
|
||||
ret i8 %res13
|
||||
}
|
||||
|
||||
declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32)
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i16 %x2 to <16 x i1>
|
||||
%res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
|
||||
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i16 %x2 to <16 x i1>
|
||||
%res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
|
||||
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i16 %x2 to <16 x i1>
|
||||
%res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
|
||||
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) {
|
||||
; CHECK-LABEL: sint_to_fp_16i32_to_16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <16 x i32> %x to <16 x half>
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32)
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i16 %x2 to <16 x i1>
|
||||
%res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
|
||||
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i16 %x2 to <16 x i1>
|
||||
%res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
|
||||
%res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) {
|
||||
; CHECK-LABEL: uint_to_fp_16i32_to_16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <16 x i32> %x to <16 x half>
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32)
|
||||
|
||||
define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvtph2dq {rn-sae}, %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32)
|
||||
|
||||
define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvtph2udq {rn-sae}, %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32)
|
||||
|
||||
define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvttph2dq {sae}, %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32)
|
||||
|
||||
define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %ymm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvttph2udq {sae}, %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32)
|
||||
|
||||
define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvtph2qq {rn-sae}, %xmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
|
||||
|
||||
define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvtph2uqq {rn-sae}, %xmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
|
||||
|
||||
define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32)
|
||||
|
||||
define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2si %xmm0, %ecx
|
||||
; CHECK-NEXT: vcvtsh2si {rz-sae}, %xmm0, %eax
|
||||
; CHECK-NEXT: addl %ecx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11)
|
||||
%res = add i32 %res1, %res2
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32)
|
||||
|
||||
define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2si %xmm0, %rcx
|
||||
; CHECK-NEXT: vcvtsh2si {ru-sae}, %xmm0, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10)
|
||||
%res = add i64 %res1, %res2
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32)
|
||||
|
||||
define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttsh2si %xmm0, %ecx
|
||||
; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %eax
|
||||
; CHECK-NEXT: addl %ecx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8)
|
||||
%res = add i32 %res1, %res2
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32)
|
||||
|
||||
define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttsh2si %xmm0, %rcx
|
||||
; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8)
|
||||
%res = add i64 %res1, %res2
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
|
||||
declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32)
|
||||
|
||||
define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2usi %xmm0, %ecx
|
||||
; CHECK-NEXT: vcvtsh2usi {rd-sae}, %xmm0, %eax
|
||||
; CHECK-NEXT: addl %ecx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9)
|
||||
%res = add i32 %res1, %res2
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
|
||||
declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32)
|
||||
|
||||
define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2usi %xmm0, %rcx
|
||||
; CHECK-NEXT: vcvtsh2usi {ru-sae}, %xmm0, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10)
|
||||
%res = add i64 %res1, %res2
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32)
|
||||
|
||||
define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttsh2usi %xmm0, %ecx
|
||||
; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %eax
|
||||
; CHECK-NEXT: addl %ecx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8)
|
||||
%res = add i32 %res1, %res2
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32)
|
||||
|
||||
define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttsh2usi %xmm0, %rcx
|
||||
; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4)
|
||||
%res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8)
|
||||
%res = add i64 %res1, %res2
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32)
|
||||
|
||||
define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsi2sh %edi, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
|
||||
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
|
||||
%res = fadd <8 x half> %res1, %res2
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32)
|
||||
|
||||
define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
|
||||
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8)
|
||||
%res = fadd <8 x half> %res1, %res2
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32)
|
||||
|
||||
define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtusi2sh %edi, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
|
||||
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
|
||||
%res = fadd <8 x half> %res1, %res2
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32)
|
||||
|
||||
define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) {
|
||||
; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
|
||||
%res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9)
|
||||
%res = fadd <8 x half> %res1, %res2
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
|
|
@ -23,3 +23,925 @@ entry:
|
|||
%0 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
|
||||
ret <2 x i64> %0
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = sitofp <8 x i32> %x0 to <8 x half>
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = sitofp <8 x i32> %x0 to <8 x half>
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) {
|
||||
; CHECK-LABEL: sint_to_fp_8i32_to_8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <8 x i32> %x to <8 x half>
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) {
|
||||
; CHECK-LABEL: sint_to_fp_4i32_to_4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <4 x i32> %x to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) {
|
||||
; CHECK-LABEL: sint_to_fp_2i32_to_2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <2 x i32> %x to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_sint_4f16_to_4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptosi <4 x half> %x to <4 x i32>
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_sint_2f16_to_2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptosi <2 x half> %x to <2 x i32>
|
||||
ret <2 x i32> %res
|
||||
}
|
||||
|
||||
define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_sint_2f16_to_2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptosi <2 x half> %x to <2 x i16>
|
||||
ret <2 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = uitofp <8 x i32> %x0 to <8 x half>
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %x2 to <8 x i1>
|
||||
%res0 = uitofp <8 x i32> %x0 to <8 x half>
|
||||
%res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) {
|
||||
; CHECK-LABEL: uint_to_fp_8i32_to_8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <8 x i32> %x to <8 x half>
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_uint_8f16_to_8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptoui <8 x half> %x to <8 x i32>
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) {
|
||||
; CHECK-LABEL: uint_to_fp_4i32_to_4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <4 x i32> %x to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) {
|
||||
; CHECK-LABEL: uint_to_fp_2i32_to_2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <2 x i32> %x to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_uint_4f16_to_4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptoui <4 x half> %x to <4 x i32>
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_uint_2f16_to_2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptoui <2 x half> %x to <2 x i32>
|
||||
ret <2 x i32> %res
|
||||
}
|
||||
|
||||
define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_uint_2f16_to_2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptoui <2 x half> %x to <2 x i16>
|
||||
ret <2 x i16> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2dq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2dq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
|
||||
|
||||
define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
|
||||
ret <4 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
|
||||
|
||||
define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <4 x double>, <4 x double>* %px0, align 32
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %esi, %k1
|
||||
; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <2 x double>, <2 x double>* %px0, align 16
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) {
|
||||
; CHECK-LABEL: sint_to_fp_4i64_to_4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <4 x i64> %x to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_sint_4f16_to_4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptosi <4 x half> %x to <4 x i64>
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) {
|
||||
; CHECK-LABEL: sint_to_fp_2i64_to_2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = sitofp <2 x i64> %x to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_sint_2f16_to_2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptosi <2 x half> %x to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) {
|
||||
; CHECK-LABEL: uint_to_fp_4i64_to_4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <4 x i64> %x to <4 x half>
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_uint_4f16_to_4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptoui <4 x half> %x to <4 x i64>
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8)
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) {
|
||||
; CHECK-LABEL: uint_to_fp_2i64_to_2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = uitofp <2 x i64> %x to <2 x half>
|
||||
ret <2 x half> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) {
|
||||
; CHECK-LABEL: fp_to_uint_2f16_to_2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = fptoui <2 x half> %x to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8)
|
||||
|
||||
define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
|
|
|
@ -0,0 +1,157 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-avx512fp16 | FileCheck %s -check-prefix=LIBCALL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512fp16 | FileCheck %s -check-prefix=FP16
|
||||
|
||||
define void @test1(float %src, i16* %dest) {
|
||||
; LIBCALL-LABEL: test1:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: pushq %rbx
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 16
|
||||
; LIBCALL-NEXT: .cfi_offset %rbx, -16
|
||||
; LIBCALL-NEXT: movq %rdi, %rbx
|
||||
; LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT
|
||||
; LIBCALL-NEXT: movw %ax, (%rbx)
|
||||
; LIBCALL-NEXT: popq %rbx
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 8
|
||||
; LIBCALL-NEXT: retq
|
||||
;
|
||||
; FP16-LABEL: test1:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
|
||||
; FP16-NEXT: vmovsh %xmm0, (%rdi)
|
||||
; FP16-NEXT: retq
|
||||
%1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
|
||||
store i16 %1, i16* %dest, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define float @test2(i16* nocapture %src) {
|
||||
; LIBCALL-LABEL: test2:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: movzwl (%rdi), %edi
|
||||
; LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL
|
||||
;
|
||||
; FP16-LABEL: test2:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: vmovsh (%rdi), %xmm0
|
||||
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
|
||||
; FP16-NEXT: retq
|
||||
%1 = load i16, i16* %src, align 2
|
||||
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define float @test3(float %src) nounwind uwtable readnone {
|
||||
; LIBCALL-LABEL: test3:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: pushq %rax
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 16
|
||||
; LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT
|
||||
; LIBCALL-NEXT: movzwl %ax, %edi
|
||||
; LIBCALL-NEXT: popq %rax
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 8
|
||||
; LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL
|
||||
;
|
||||
; FP16-LABEL: test3:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
|
||||
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
|
||||
; FP16-NEXT: retq
|
||||
%1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
|
||||
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
|
||||
ret float %2
|
||||
}
|
||||
|
||||
; FIXME: Should it be __extendhfdf2?
|
||||
define double @test4(i16* nocapture %src) {
|
||||
; LIBCALL-LABEL: test4:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: pushq %rax
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 16
|
||||
; LIBCALL-NEXT: movzwl (%rdi), %edi
|
||||
; LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT
|
||||
; LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; LIBCALL-NEXT: popq %rax
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 8
|
||||
; LIBCALL-NEXT: retq
|
||||
;
|
||||
; FP16-LABEL: test4:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: vmovsh (%rdi), %xmm0
|
||||
; FP16-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
|
||||
; FP16-NEXT: retq
|
||||
%1 = load i16, i16* %src, align 2
|
||||
%2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define i16 @test5(double %src) {
|
||||
; LIBCALL-LABEL: test5:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: jmp __truncdfhf2@PLT # TAILCALL
|
||||
;
|
||||
; FP16-LABEL: test5:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
|
||||
; FP16-NEXT: vmovw %xmm0, %eax
|
||||
; FP16-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; FP16-NEXT: retq
|
||||
%val = tail call i16 @llvm.convert.to.fp16.f64(double %src)
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
; FIXME: Should it be __extendhfxf2?
|
||||
define x86_fp80 @test6(i16* nocapture %src) {
|
||||
; LIBCALL-LABEL: test6:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: pushq %rax
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 16
|
||||
; LIBCALL-NEXT: movzwl (%rdi), %edi
|
||||
; LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT
|
||||
; LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp)
|
||||
; LIBCALL-NEXT: flds {{[0-9]+}}(%rsp)
|
||||
; LIBCALL-NEXT: popq %rax
|
||||
; LIBCALL-NEXT: .cfi_def_cfa_offset 8
|
||||
; LIBCALL-NEXT: retq
|
||||
;
|
||||
; FP16-LABEL: test6:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: pushq %rax
|
||||
; FP16-NEXT: .cfi_def_cfa_offset 16
|
||||
; FP16-NEXT: vmovsh (%rdi), %xmm0
|
||||
; FP16-NEXT: callq __extendhfxf2@PLT
|
||||
; FP16-NEXT: popq %rax
|
||||
; FP16-NEXT: .cfi_def_cfa_offset 8
|
||||
; FP16-NEXT: retq
|
||||
%1 = load i16, i16* %src, align 2
|
||||
%2 = tail call x86_fp80 @llvm.convert.from.fp16.f80(i16 %1)
|
||||
ret x86_fp80 %2
|
||||
}
|
||||
|
||||
define i16 @test7(x86_fp80 %src) {
|
||||
; LIBCALL-LABEL: test7:
|
||||
; LIBCALL: # %bb.0:
|
||||
; LIBCALL-NEXT: jmp __truncxfhf2@PLT # TAILCALL
|
||||
;
|
||||
; FP16-LABEL: test7:
|
||||
; FP16: # %bb.0:
|
||||
; FP16-NEXT: subq $24, %rsp
|
||||
; FP16-NEXT: .cfi_def_cfa_offset 32
|
||||
; FP16-NEXT: fldt {{[0-9]+}}(%rsp)
|
||||
; FP16-NEXT: fstpt (%rsp)
|
||||
; FP16-NEXT: callq __truncxfhf2@PLT
|
||||
; FP16-NEXT: vmovw %xmm0, %eax
|
||||
; FP16-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; FP16-NEXT: addq $24, %rsp
|
||||
; FP16-NEXT: .cfi_def_cfa_offset 8
|
||||
; FP16-NEXT: retq
|
||||
%val = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %src)
|
||||
ret i16 %val
|
||||
}
|
||||
|
||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
||||
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
|
||||
declare x86_fp80 @llvm.convert.from.fp16.f80(i16) nounwind readnone
|
||||
declare i16 @llvm.convert.to.fp16.f80(x86_fp80) nounwind readnone
|
|
@ -6,6 +6,10 @@ declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metad
|
|||
declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
|
||||
declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
|
||||
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
|
||||
|
||||
define half @fadd_f16(half %a, half %b) nounwind strictfp {
|
||||
; X86-LABEL: fadd_f16:
|
||||
|
@ -75,4 +79,98 @@ define half @fdiv_f16(half %a, half %b) nounwind strictfp {
|
|||
ret half %ret
|
||||
}
|
||||
|
||||
define void @fpext_f16_to_f32(half* %val, float* %ret) nounwind strictfp {
|
||||
; X86-LABEL: fpext_f16_to_f32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: vmovsh (%ecx), %xmm0
|
||||
; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%eax)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fpext_f16_to_f32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovsh (%rdi), %xmm0
|
||||
; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovss %xmm0, (%rsi)
|
||||
; X64-NEXT: retq
|
||||
%1 = load half, half* %val, align 4
|
||||
%res = call float @llvm.experimental.constrained.fpext.f32.f16(half %1,
|
||||
metadata !"fpexcept.strict") #0
|
||||
store float %res, float* %ret, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fpext_f16_to_f64(half* %val, double* %ret) nounwind strictfp {
|
||||
; X86-LABEL: fpext_f16_to_f64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: vmovsh (%ecx), %xmm0
|
||||
; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovsd %xmm0, (%eax)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fpext_f16_to_f64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovsh (%rdi), %xmm0
|
||||
; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovsd %xmm0, (%rsi)
|
||||
; X64-NEXT: retq
|
||||
%1 = load half, half* %val, align 4
|
||||
%res = call double @llvm.experimental.constrained.fpext.f64.f16(half %1,
|
||||
metadata !"fpexcept.strict") #0
|
||||
store double %res, double* %ret, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptrunc_float_to_f16(float* %val, half *%ret) nounwind strictfp {
|
||||
; X86-LABEL: fptrunc_float_to_f16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovsh %xmm0, (%eax)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptrunc_float_to_f16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovsh %xmm0, (%rsi)
|
||||
; X64-NEXT: retq
|
||||
%1 = load float, float* %val, align 8
|
||||
%res = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %1,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
store half %res, half* %ret, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptrunc_double_to_f16(double* %val, half *%ret) nounwind strictfp {
|
||||
; X86-LABEL: fptrunc_double_to_f16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovsh %xmm0, (%eax)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptrunc_double_to_f16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovsh %xmm0, (%rsi)
|
||||
; X64-NEXT: retq
|
||||
%1 = load double, double* %val, align 8
|
||||
%res = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %1,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
store half %res, half* %ret, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
|
@ -0,0 +1,184 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
|
||||
|
||||
declare i1 @llvm.experimental.constrained.fptosi.i1.f16(half, metadata)
|
||||
declare i8 @llvm.experimental.constrained.fptosi.i8.f16(half, metadata)
|
||||
declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata)
|
||||
declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
|
||||
declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
|
||||
declare i1 @llvm.experimental.constrained.fptoui.i1.f16(half, metadata)
|
||||
declare i8 @llvm.experimental.constrained.fptoui.i8.f16(half, metadata)
|
||||
declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata)
|
||||
declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
|
||||
declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
|
||||
|
||||
define i1 @fptosi_f16toi1(half %x) #0 {
|
||||
; X86-LABEL: fptosi_f16toi1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptosi_f16toi1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i1 @llvm.experimental.constrained.fptosi.i1.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i1 %result
|
||||
}
|
||||
|
||||
define i8 @fptosi_f16toi8(half %x) #0 {
|
||||
; X86-LABEL: fptosi_f16toi8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptosi_f16toi8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i8 %result
|
||||
}
|
||||
|
||||
define i16 @fptosi_f16toi16(half %x) #0 {
|
||||
; X86-LABEL: fptosi_f16toi16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptosi_f16toi16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i16 %result
|
||||
}
|
||||
|
||||
define i32 @fptosi_f16toi32(half %x) #0 {
|
||||
; X86-LABEL: fptosi_f16toi32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptosi_f16toi32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i64 @fptosi_f16toi64(half %x) #0 {
|
||||
; X86-LABEL: fptosi_f16toi64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-NEXT: vcvttph2qq %xmm0, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptosi_f16toi64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %rax
|
||||
; X64-NEXT: retq
|
||||
%result = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i1 @fptoui_f16toi1(half %x) #0 {
|
||||
; X86-LABEL: fptoui_f16toi1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptoui_f16toi1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i1 @llvm.experimental.constrained.fptoui.i1.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i1 %result
|
||||
}
|
||||
|
||||
define i8 @fptoui_f16toi8(half %x) #0 {
|
||||
; X86-LABEL: fptoui_f16toi8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptoui_f16toi8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i8 @llvm.experimental.constrained.fptoui.i8.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i8 %result
|
||||
}
|
||||
|
||||
define i16 @fptoui_f16toi16(half %x) #0 {
|
||||
; X86-LABEL: fptoui_f16toi16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptoui_f16toi16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2si %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i16 %result
|
||||
}
|
||||
|
||||
define i32 @fptoui_f16toi32(half %x) #0 {
|
||||
; X86-LABEL: fptoui_f16toi32:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptoui_f16toi32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2usi %xmm0, %eax
|
||||
; X64-NEXT: retq
|
||||
%result = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define i64 @fptoui_f16toi64(half %x) #0 {
|
||||
; X86-LABEL: fptoui_f16toi64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-NEXT: vcvttph2uqq %xmm0, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fptoui_f16toi64:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvttsh2usi %xmm0, %rax
|
||||
; X64-NEXT: retq
|
||||
%result = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -0,0 +1,197 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
|
||||
|
||||
declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
|
||||
|
||||
define half @sitofp_i1tof16(i1 %x) #0 {
|
||||
; X86-LABEL: sitofp_i1tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: andb $1, %al
|
||||
; X86-NEXT: negb %al
|
||||
; X86-NEXT: movsbl %al, %eax
|
||||
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sitofp_i1tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: andb $1, %dil
|
||||
; X64-NEXT: negb %dil
|
||||
; X64-NEXT: movsbl %dil, %eax
|
||||
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @sitofp_i8tof16(i8 %x) #0 {
|
||||
; X86-LABEL: sitofp_i8tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sitofp_i8tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movsbl %dil, %eax
|
||||
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @sitofp_i16tof16(i16 %x) #0 {
|
||||
; X86-LABEL: sitofp_i16tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sitofp_i16tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movswl %di, %eax
|
||||
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @sitofp_i32tof16(i32 %x) #0 {
|
||||
; X86-LABEL: sitofp_i32tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sitofp_i32tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @sitofp_i64tof16(i64 %x) #0 {
|
||||
; X86-LABEL: sitofp_i64tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-NEXT: vcvtqq2ph %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: sitofp_i64tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @uitofp_i1tof16(i1 %x) #0 {
|
||||
; X86-LABEL: uitofp_i1tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: andb $1, %al
|
||||
; X86-NEXT: movzbl %al, %eax
|
||||
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_i1tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: andl $1, %edi
|
||||
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @uitofp_i8tof16(i8 %x) #0 {
|
||||
; X86-LABEL: uitofp_i8tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_i8tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzbl %dil, %eax
|
||||
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @uitofp_i16tof16(i16 %x) #0 {
|
||||
; X86-LABEL: uitofp_i16tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_i16tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzwl %di, %eax
|
||||
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @uitofp_i32tof16(i32 %x) #0 {
|
||||
; X86-LABEL: uitofp_i32tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_i32tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
define half @uitofp_i64tof16(i64 %x) #0 {
|
||||
; X86-LABEL: uitofp_i64tof16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_i64tof16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -144,5 +144,21 @@ define <16 x half> @stack_fold_mulph_ymm(<16 x half> %a0, <16 x half> %a1) {
|
|||
ret <16 x half> %2
|
||||
}
|
||||
|
||||
define <8 x half> @stack_fold_subph(<8 x half> %a0, <8 x half> %a1) {
|
||||
;CHECK-LABEL: stack_fold_subph
|
||||
;CHECK: vsubph {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = fsub <8 x half> %a0, %a1
|
||||
ret <8 x half> %2
|
||||
}
|
||||
|
||||
define <16 x half> @stack_fold_subph_ymm(<16 x half> %a0, <16 x half> %a1) {
|
||||
;CHECK-LABEL: stack_fold_subph_ymm
|
||||
;CHECK: vsubph {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = fsub <16 x half> %a0, %a1
|
||||
ret <16 x half> %2
|
||||
}
|
||||
|
||||
attributes #0 = { "unsafe-fp-math"="false" }
|
||||
attributes #1 = { "unsafe-fp-math"="true" }
|
||||
|
|
|
@ -6,6 +6,16 @@ declare <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half>, <8 x ha
|
|||
declare <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half>, <8 x half>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half>, <8 x half>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half>, <8 x half>, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
|
||||
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata)
|
||||
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
|
||||
declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
|
||||
declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata)
|
||||
|
||||
define <8 x half> @f2(<8 x half> %a, <8 x half> %b) #0 {
|
||||
; CHECK-LABEL: f2:
|
||||
|
@ -51,4 +61,130 @@ define <8 x half> @f8(<8 x half> %a, <8 x half> %b) #0 {
|
|||
ret <8 x half> %ret
|
||||
}
|
||||
|
||||
define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ext = extractelement <2 x double> %a0, i32 0
|
||||
%cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
%res = insertelement <8 x half> %a1, half %cvt, i32 0
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f12(<2 x double> %a0, <8 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ext = extractelement <8 x half> %a1, i32 0
|
||||
%cvt = call double @llvm.experimental.constrained.fpext.f64.f16(half %ext,
|
||||
metadata !"fpexcept.strict") #0
|
||||
%res = insertelement <2 x double> %a0, double %cvt, i32 0
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f15(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(
|
||||
<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x double> %ret
|
||||
}
|
||||
|
||||
define <2 x half> @f16(<2 x double> %a) #0 {
|
||||
; CHECK-LABEL: f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(
|
||||
<2 x double> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x half> %ret
|
||||
}
|
||||
|
||||
define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ext = extractelement <4 x float> %a0, i32 0
|
||||
%cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
%res = insertelement <8 x half> %a1, half %cvt, i32 0
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x float> @f18(<4 x float> %a0, <8 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f18:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ext = extractelement <8 x half> %a1, i32 0
|
||||
%cvt = call float @llvm.experimental.constrained.fpext.f32.f16(half %ext,
|
||||
metadata !"fpexcept.strict") #0
|
||||
%res = insertelement <4 x float> %a0, float %cvt, i32 0
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x float> @f19(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: f19:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(
|
||||
<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @f20(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: f20:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(
|
||||
<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <2 x half> @f21(<2 x float> %a) #0 {
|
||||
; CHECK-LABEL: f21:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(
|
||||
<2 x float> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x half> %ret
|
||||
}
|
||||
|
||||
define <4 x half> @f22(<4 x float> %a) #0 {
|
||||
; CHECK-LABEL: f22:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(
|
||||
<4 x float> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x half> %ret
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
|
@ -6,6 +6,10 @@ declare <16 x half> @llvm.experimental.constrained.fadd.v16f16(<16 x half>, <16
|
|||
declare <16 x half> @llvm.experimental.constrained.fsub.v16f16(<16 x half>, <16 x half>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.fmul.v16f16(<16 x half>, <16 x half>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.fdiv.v16f16(<16 x half>, <16 x half>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(<4 x half>, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half>, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float>, metadata, metadata)
|
||||
|
||||
define <16 x half> @f2(<16 x half> %a, <16 x half> %b) #0 {
|
||||
; CHECK-LABEL: f2:
|
||||
|
@ -51,4 +55,52 @@ define <16 x half> @f8(<16 x half> %a, <16 x half> %b) #0 {
|
|||
ret <16 x half> %ret
|
||||
}
|
||||
|
||||
define <4 x double> @f11(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(
|
||||
<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x double> %ret
|
||||
}
|
||||
|
||||
define <4 x half> @f12(<4 x double> %a) #0 {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(
|
||||
<4 x double> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x half> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @f14(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(
|
||||
<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x half> @f15(<8 x float> %a) #0 {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(
|
||||
<8 x float> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %ret
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
|
@ -6,6 +6,10 @@ declare <32 x half> @llvm.experimental.constrained.fadd.v32f16(<32 x half>, <32
|
|||
declare <32 x half> @llvm.experimental.constrained.fsub.v32f16(<32 x half>, <32 x half>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.fmul.v32f16(<32 x half>, <32 x half>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.fdiv.v32f16(<32 x half>, <32 x half>, metadata, metadata)
|
||||
declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(<8 x half>, metadata)
|
||||
declare <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half>, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(<16 x float>, metadata, metadata)
|
||||
|
||||
define <32 x half> @f2(<32 x half> %a, <32 x half> %b) #0 {
|
||||
; CHECK-LABEL: f2:
|
||||
|
@ -51,4 +55,51 @@ define <32 x half> @f8(<32 x half> %a, <32 x half> %b) #0 {
|
|||
ret <32 x half> %ret
|
||||
}
|
||||
|
||||
define <8 x double> @f11(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(
|
||||
<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
define <8 x half> @f12(<8 x double> %a) #0 {
|
||||
; CHECK-LABEL: f12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(
|
||||
<8 x double> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %ret
|
||||
}
|
||||
|
||||
define <16 x float> @f14(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(
|
||||
<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x float> %ret
|
||||
}
|
||||
|
||||
define <16 x half> @f15(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(
|
||||
<16 x float> %a,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %ret
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
|
@ -0,0 +1,323 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half>, metadata)
|
||||
declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half>, metadata)
|
||||
declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half>, metadata)
|
||||
declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half>, metadata)
|
||||
|
||||
define <2 x i64> @strict_vector_fptosi_v2f16_to_v2i64(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
define <2 x i64> @strict_vector_fptoui_v2f16_to_v2i64(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i32> %ret
|
||||
}
|
||||
|
||||
define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i32> %ret
|
||||
}
|
||||
|
||||
define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i16> %ret
|
||||
}
|
||||
|
||||
define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i16> %ret
|
||||
}
|
||||
|
||||
define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i8> %ret
|
||||
}
|
||||
|
||||
define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i8> %ret
|
||||
}
|
||||
|
||||
define <2 x i1> @strict_vector_fptosi_v2f16_to_v2i1(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2q %k0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i1> %ret
|
||||
}
|
||||
|
||||
define <2 x i1> @strict_vector_fptoui_v2f16_to_v2i1(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2q %k0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x i1> %ret
|
||||
}
|
||||
|
||||
define <4 x i32> @strict_vector_fptosi_v4f16_to_v4i32(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
define <4 x i32> @strict_vector_fptoui_v4f16_to_v4i32(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i16> %ret
|
||||
}
|
||||
|
||||
define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i16> %ret
|
||||
}
|
||||
|
||||
define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i8> %ret
|
||||
}
|
||||
|
||||
define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i8> %ret
|
||||
}
|
||||
|
||||
define <4 x i1> @strict_vector_fptosi_v4f16_to_v4i1(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2d %k0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i1> %ret
|
||||
}
|
||||
|
||||
define <4 x i1> @strict_vector_fptoui_v4f16_to_v4i1(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovw2m %xmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2d %k0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i1> %ret
|
||||
}
|
||||
|
||||
define <8 x i16> @strict_vector_fptosi_v8f16_to_v8i16(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
define <8 x i16> @strict_vector_fptoui_v8f16_to_v8i16(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i8> %ret
|
||||
}
|
||||
|
||||
define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i8> %ret
|
||||
}
|
||||
|
||||
define <8 x i1> @strict_vector_fptosi_v8f16_to_v8i1(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpmovd2m %ymm0, %k0
|
||||
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i1> %ret
|
||||
}
|
||||
|
||||
define <8 x i1> @strict_vector_fptoui_v8f16_to_v8i1(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpmovd2m %ymm0, %k0
|
||||
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i1> %ret
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -0,0 +1,132 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
|
||||
declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half>, metadata)
|
||||
declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half>, metadata)
|
||||
declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half>, metadata)
|
||||
declare <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half>, metadata)
|
||||
declare <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half>, metadata)
|
||||
declare <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half>, metadata)
|
||||
declare <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half>, metadata)
|
||||
declare <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half>, metadata)
|
||||
declare <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half>, metadata)
|
||||
|
||||
define <4 x i64> @strict_vector_fptosi_v4f16_to_v4i64(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i64> %ret
|
||||
}
|
||||
|
||||
define <4 x i64> @strict_vector_fptoui_v4f16_to_v4i64(<4 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x i64> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @strict_vector_fptosi_v8f16_to_v8i32(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <8 x i32> @strict_vector_fptoui_v8f16_to_v8i32(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i16> %ret
|
||||
}
|
||||
|
||||
define <16 x i8> @strict_vector_fptosi_v16f16_to_v16i8(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i8> %ret
|
||||
}
|
||||
|
||||
define <16 x i8> @strict_vector_fptoui_v16f16_to_v16i8(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i8> %ret
|
||||
}
|
||||
|
||||
define <16 x i1> @strict_vector_fptosi_v16f16_to_v16i1(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpmovd2m %zmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2b %k0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i1> %ret
|
||||
}
|
||||
|
||||
define <16 x i1> @strict_vector_fptoui_v16f16_to_v16i1(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovd2m %zmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2b %k0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i1> %ret
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -0,0 +1,124 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
|
||||
declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half>, metadata)
|
||||
declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half>, metadata)
|
||||
declare <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half>, metadata)
|
||||
declare <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half>, metadata)
|
||||
declare <32 x i16> @llvm.experimental.constrained.fptosi.v32i16.v32f16(<32 x half>, metadata)
|
||||
declare <32 x i16> @llvm.experimental.constrained.fptoui.v32i16.v32f16(<32 x half>, metadata)
|
||||
declare <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half>, metadata)
|
||||
declare <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half>, metadata)
|
||||
declare <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half>, metadata)
|
||||
declare <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half>, metadata)
|
||||
|
||||
define <8 x i64> @strict_vector_fptosi_v8f16_to_v8i64(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2qq %xmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i64> %ret
|
||||
}
|
||||
|
||||
define <8 x i64> @strict_vector_fptoui_v8f16_to_v8i64(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x i64> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @strict_vector_fptosi_v16f16_to_v16i32(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <16 x i32> @strict_vector_fptoui_v16f16_to_v16i32(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2udq %ymm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x i32> %ret
|
||||
}
|
||||
|
||||
define <32 x i16> @strict_vector_fptosi_v32f16_to_v32i16(<32 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <32 x i16> @llvm.experimental.constrained.fptosi.v32i16.v32f16(<32 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x i16> %ret
|
||||
}
|
||||
|
||||
define <32 x i16> @strict_vector_fptoui_v32f16_to_v32i16(<32 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <32 x i16> @llvm.experimental.constrained.fptoui.v32i16.v32f16(<32 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x i16> %ret
|
||||
}
|
||||
|
||||
define <32 x i8> @strict_vector_fptosi_v32f16_to_v32i8(<32 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x i8> %ret
|
||||
}
|
||||
|
||||
define <32 x i8> @strict_vector_fptoui_v32f16_to_v32i8(<32 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x i8> %ret
|
||||
}
|
||||
|
||||
define <32 x i1> @strict_vector_fptosi_v32f16_to_v32i1(<32 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovw2m %zmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x i1> %ret
|
||||
}
|
||||
|
||||
define <32 x i1> @strict_vector_fptoui_v32f16_to_v32i1(<32 x half> %a) #0 {
|
||||
; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpsllw $15, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovw2m %zmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%ret = call <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half> %a,
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x i1> %ret
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -0,0 +1,137 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64
|
||||
|
||||
declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i1(<8 x i1>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i8(<8 x i8>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i8(<8 x i8>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i16(<8 x i16>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i16(<8 x i16>, metadata, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32>, metadata, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32>, metadata, metadata)
|
||||
declare <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64>, metadata, metadata)
|
||||
declare <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64>, metadata, metadata)
|
||||
|
||||
define <4 x half> @sitofp_v4i32_v4f16(<4 x i32> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v4i32_v4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x half> %result
|
||||
}
|
||||
|
||||
define <4 x half> @uitofp_v4i32_v4f16(<4 x i32> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v4i32_v4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x half> %result
|
||||
}
|
||||
|
||||
define <2 x half> @sitofp_v2i64_v2f16(<2 x i64> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v2i64_v2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
|
||||
define <2 x half> @uitofp_v2i64_v2f16(<2 x i64> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v2i64_v2f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @sitofp_v8i1_v8f16(<8 x i1> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v8i1_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpsraw $15, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i1(<8 x i1> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @uitofp_v8i1_v8f16(<8 x i1> %x) #0 {
|
||||
; X86-LABEL: uitofp_v8i1_v8f16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_v8i1_v8f16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @sitofp_v8i8_v8f16(<8 x i8> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v8i8_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i8(<8 x i8> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @uitofp_v8i8_v8f16(<8 x i8> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v8i8_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i8(<8 x i8> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @sitofp_v8i16_v8f16(<8 x i16> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v8i16_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i16(<8 x i16> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @uitofp_v8i16_v8f16(<8 x i16> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v8i16_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i16(<8 x i16> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -0,0 +1,144 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64
|
||||
|
||||
declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i16(<16 x i16>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i16(<16 x i16>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32>, metadata, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64>, metadata, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64>, metadata, metadata)
|
||||
|
||||
define <16 x half> @sitofp_v16i1_v16f16(<16 x i1> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v16i1_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; CHECK-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @uitofp_v16i1_v16f16(<16 x i1> %x) #0 {
|
||||
; X86-LABEL: uitofp_v16i1_v16f16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; X86-NEXT: vcvtuw2ph %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_v16i1_v16f16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; X64-NEXT: vcvtuw2ph %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
%result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @sitofp_v16i8_v16f16(<16 x i8> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v16i8_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @uitofp_v16i8_v16f16(<16 x i8> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v16i8_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @sitofp_v16i16_v16f16(<16 x i16> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v16i16_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i16(<16 x i16> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @uitofp_v16i16_v16f16(<16 x i16> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v16i16_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i16(<16 x i16> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @sitofp_v8i32_v8f16(<8 x i32> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v8i32_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @uitofp_v8i32_v8f16(<8 x i32> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v8i32_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <4 x half> @sitofp_v4i64_v4f16(<4 x i64> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v4i64_v4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x half> %result
|
||||
}
|
||||
|
||||
define <4 x half> @uitofp_v4i64_v4f16(<4 x i64> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v4i64_v4f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <4 x half> %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -0,0 +1,142 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK,X64
|
||||
|
||||
declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i16(<32 x i16>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i16(<32 x i16>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64>, metadata, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64>, metadata, metadata)
|
||||
|
||||
define <32 x half> @sitofp_v32i1_v32f16(<32 x i1> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v32i1_v32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; CHECK-NEXT: vpsllw $15, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpsraw $15, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %result
|
||||
}
|
||||
|
||||
define <32 x half> @uitofp_v32i1_v32f16(<32 x i1> %x) #0 {
|
||||
; X86-LABEL: uitofp_v32i1_v32f16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; X86-NEXT: vcvtuw2ph %zmm0, %zmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: uitofp_v32i1_v32f16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; X64-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; X64-NEXT: vcvtuw2ph %zmm0, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %result
|
||||
}
|
||||
|
||||
define <32 x half> @sitofp_v32i8_v32f16(<32 x i8> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v32i8_v32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %result
|
||||
}
|
||||
|
||||
define <32 x half> @uitofp_v32i8_v32f16(<32 x i8> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v32i8_v32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %result
|
||||
}
|
||||
|
||||
define <32 x half> @sitofp_v32i16_v32f16(<32 x i16> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v32i16_v32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i16(<32 x i16> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %result
|
||||
}
|
||||
|
||||
define <32 x half> @uitofp_v32i16_v32f16(<32 x i16> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v32i16_v32f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i16(<32 x i16> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @sitofp_v16i32_v16f16(<16 x i32> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v16i32_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <16 x half> @uitofp_v16i32_v16f16(<16 x i32> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v16i32_v16f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @sitofp_v8i64_v8f16(<8 x i64> %x) #0 {
|
||||
; CHECK-LABEL: sitofp_v8i64_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
define <8 x half> @uitofp_v8i64_v8f16(<8 x i64> %x) #0 {
|
||||
; CHECK-LABEL: uitofp_v8i64_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64> %x,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %result
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
|
@ -460,3 +460,899 @@
|
|||
# ATT: vucomish -256(%rdx), %xmm30
|
||||
# INTEL: vucomish xmm30, word ptr [rdx - 256]
|
||||
0x62,0x65,0x7c,0x08,0x2e,0x72,0x80
|
||||
|
||||
# ATT: vcvtdq2ph %zmm29, %ymm30
|
||||
# INTEL: vcvtdq2ph ymm30, zmm29
|
||||
0x62,0x05,0x7c,0x48,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtdq2ph {rn-sae}, %zmm29, %ymm30
|
||||
# INTEL: vcvtdq2ph ymm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7c,0x18,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtdq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
# INTEL: vcvtdq2ph ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7c,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtdq2ph (%r9){1to16}, %ymm30
|
||||
# INTEL: vcvtdq2ph ymm30, dword ptr [r9]{1to16}
|
||||
0x62,0x45,0x7c,0x58,0x5b,0x31
|
||||
|
||||
# ATT: vcvtdq2ph 8128(%rcx), %ymm30
|
||||
# INTEL: vcvtdq2ph ymm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7c,0x48,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtdq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
# INTEL: vcvtdq2ph ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
|
||||
0x62,0x65,0x7c,0xdf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtpd2ph %zmm29, %xmm30
|
||||
# INTEL: vcvtpd2ph xmm30, zmm29
|
||||
0x62,0x05,0xfd,0x48,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtpd2ph {rn-sae}, %zmm29, %xmm30
|
||||
# INTEL: vcvtpd2ph xmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0xfd,0x18,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtpd2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
# INTEL: vcvtpd2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0xfd,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtpd2ph (%r9){1to8}, %xmm30
|
||||
# INTEL: vcvtpd2ph xmm30, qword ptr [r9]{1to8}
|
||||
0x62,0x45,0xfd,0x58,0x5a,0x31
|
||||
|
||||
# ATT: vcvtpd2phz 8128(%rcx), %xmm30
|
||||
# INTEL: vcvtpd2ph xmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0xfd,0x48,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtpd2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
|
||||
0x62,0x65,0xfd,0xdf,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2dq %ymm29, %zmm30
|
||||
# INTEL: vcvtph2dq zmm30, ymm29
|
||||
0x62,0x05,0x7d,0x48,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtph2dq {rn-sae}, %ymm29, %zmm30
|
||||
# INTEL: vcvtph2dq zmm30, ymm29, {rn-sae}
|
||||
0x62,0x05,0x7d,0x18,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2dq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2dq (%r9){1to16}, %zmm30
|
||||
# INTEL: vcvtph2dq zmm30, word ptr [r9]{1to16}
|
||||
0x62,0x45,0x7d,0x58,0x5b,0x31
|
||||
|
||||
# ATT: vcvtph2dq 4064(%rcx), %zmm30
|
||||
# INTEL: vcvtph2dq zmm30, ymmword ptr [rcx + 4064]
|
||||
0x62,0x65,0x7d,0x48,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2dq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
0x62,0x65,0x7d,0xdf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2pd %xmm29, %zmm30
|
||||
# INTEL: vcvtph2pd zmm30, xmm29
|
||||
0x62,0x05,0x7c,0x48,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtph2pd {sae}, %xmm29, %zmm30
|
||||
# INTEL: vcvtph2pd zmm30, xmm29, {sae}
|
||||
0x62,0x05,0x7c,0x18,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtph2pd 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2pd zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7c,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2pd (%r9){1to8}, %zmm30
|
||||
# INTEL: vcvtph2pd zmm30, word ptr [r9]{1to8}
|
||||
0x62,0x45,0x7c,0x58,0x5a,0x31
|
||||
|
||||
# ATT: vcvtph2pd 2032(%rcx), %zmm30
|
||||
# INTEL: vcvtph2pd zmm30, xmmword ptr [rcx + 2032]
|
||||
0x62,0x65,0x7c,0x48,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2pd -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2pd zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
0x62,0x65,0x7c,0xdf,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2psx %ymm29, %zmm30
|
||||
# INTEL: vcvtph2psx zmm30, ymm29
|
||||
0x62,0x06,0x7d,0x48,0x13,0xf5
|
||||
|
||||
# ATT: vcvtph2psx {sae}, %ymm29, %zmm30
|
||||
# INTEL: vcvtph2psx zmm30, ymm29, {sae}
|
||||
0x62,0x06,0x7d,0x18,0x13,0xf5
|
||||
|
||||
# ATT: vcvtph2psx 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2psx zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x7d,0x4f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2psx (%r9){1to16}, %zmm30
|
||||
# INTEL: vcvtph2psx zmm30, word ptr [r9]{1to16}
|
||||
0x62,0x46,0x7d,0x58,0x13,0x31
|
||||
|
||||
# ATT: vcvtph2psx 4064(%rcx), %zmm30
|
||||
# INTEL: vcvtph2psx zmm30, ymmword ptr [rcx + 4064]
|
||||
0x62,0x66,0x7d,0x48,0x13,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2psx -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2psx zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
0x62,0x66,0x7d,0xdf,0x13,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2qq %xmm29, %zmm30
|
||||
# INTEL: vcvtph2qq zmm30, xmm29
|
||||
0x62,0x05,0x7d,0x48,0x7b,0xf5
|
||||
|
||||
# ATT: vcvtph2qq {rn-sae}, %xmm29, %zmm30
|
||||
# INTEL: vcvtph2qq zmm30, xmm29, {rn-sae}
|
||||
0x62,0x05,0x7d,0x18,0x7b,0xf5
|
||||
|
||||
# ATT: vcvtph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2qq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2qq (%r9){1to8}, %zmm30
|
||||
# INTEL: vcvtph2qq zmm30, word ptr [r9]{1to8}
|
||||
0x62,0x45,0x7d,0x58,0x7b,0x31
|
||||
|
||||
# ATT: vcvtph2qq 2032(%rcx), %zmm30
|
||||
# INTEL: vcvtph2qq zmm30, xmmword ptr [rcx + 2032]
|
||||
0x62,0x65,0x7d,0x48,0x7b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2qq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
0x62,0x65,0x7d,0xdf,0x7b,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2udq %ymm29, %zmm30
|
||||
# INTEL: vcvtph2udq zmm30, ymm29
|
||||
0x62,0x05,0x7c,0x48,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2udq {rn-sae}, %ymm29, %zmm30
|
||||
# INTEL: vcvtph2udq zmm30, ymm29, {rn-sae}
|
||||
0x62,0x05,0x7c,0x18,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2udq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7c,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2udq (%r9){1to16}, %zmm30
|
||||
# INTEL: vcvtph2udq zmm30, word ptr [r9]{1to16}
|
||||
0x62,0x45,0x7c,0x58,0x79,0x31
|
||||
|
||||
# ATT: vcvtph2udq 4064(%rcx), %zmm30
|
||||
# INTEL: vcvtph2udq zmm30, ymmword ptr [rcx + 4064]
|
||||
0x62,0x65,0x7c,0x48,0x79,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2udq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
0x62,0x65,0x7c,0xdf,0x79,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2uqq %xmm29, %zmm30
|
||||
# INTEL: vcvtph2uqq zmm30, xmm29
|
||||
0x62,0x05,0x7d,0x48,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2uqq {rn-sae}, %xmm29, %zmm30
|
||||
# INTEL: vcvtph2uqq zmm30, xmm29, {rn-sae}
|
||||
0x62,0x05,0x7d,0x18,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2uqq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2uqq (%r9){1to8}, %zmm30
|
||||
# INTEL: vcvtph2uqq zmm30, word ptr [r9]{1to8}
|
||||
0x62,0x45,0x7d,0x58,0x79,0x31
|
||||
|
||||
# ATT: vcvtph2uqq 2032(%rcx), %zmm30
|
||||
# INTEL: vcvtph2uqq zmm30, xmmword ptr [rcx + 2032]
|
||||
0x62,0x65,0x7d,0x48,0x79,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2uqq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
0x62,0x65,0x7d,0xdf,0x79,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2uw %zmm29, %zmm30
|
||||
# INTEL: vcvtph2uw zmm30, zmm29
|
||||
0x62,0x05,0x7c,0x48,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2uw {rn-sae}, %zmm29, %zmm30
|
||||
# INTEL: vcvtph2uw zmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7c,0x18,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2uw zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7c,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2uw (%r9){1to32}, %zmm30
|
||||
# INTEL: vcvtph2uw zmm30, word ptr [r9]{1to32}
|
||||
0x62,0x45,0x7c,0x58,0x7d,0x31
|
||||
|
||||
# ATT: vcvtph2uw 8128(%rcx), %zmm30
|
||||
# INTEL: vcvtph2uw zmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7c,0x48,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2uw zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x65,0x7c,0xdf,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2w %zmm29, %zmm30
|
||||
# INTEL: vcvtph2w zmm30, zmm29
|
||||
0x62,0x05,0x7d,0x48,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2w {rn-sae}, %zmm29, %zmm30
|
||||
# INTEL: vcvtph2w zmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7d,0x18,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtph2w zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2w (%r9){1to32}, %zmm30
|
||||
# INTEL: vcvtph2w zmm30, word ptr [r9]{1to32}
|
||||
0x62,0x45,0x7d,0x58,0x7d,0x31
|
||||
|
||||
# ATT: vcvtph2w 8128(%rcx), %zmm30
|
||||
# INTEL: vcvtph2w zmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7d,0x48,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtph2w zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x65,0x7d,0xdf,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtps2phx %zmm29, %ymm30
|
||||
# INTEL: vcvtps2phx ymm30, zmm29
|
||||
0x62,0x05,0x7d,0x48,0x1d,0xf5
|
||||
|
||||
# ATT: vcvtps2phx {rn-sae}, %zmm29, %ymm30
|
||||
# INTEL: vcvtps2phx ymm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7d,0x18,0x1d,0xf5
|
||||
|
||||
# ATT: vcvtps2phx 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
# INTEL: vcvtps2phx ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtps2phx (%r9){1to16}, %ymm30
|
||||
# INTEL: vcvtps2phx ymm30, dword ptr [r9]{1to16}
|
||||
0x62,0x45,0x7d,0x58,0x1d,0x31
|
||||
|
||||
# ATT: vcvtps2phx 8128(%rcx), %ymm30
|
||||
# INTEL: vcvtps2phx ymm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7d,0x48,0x1d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtps2phx -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
# INTEL: vcvtps2phx ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
|
||||
0x62,0x65,0x7d,0xdf,0x1d,0x72,0x80
|
||||
|
||||
# ATT: vcvtqq2ph %zmm29, %xmm30
|
||||
# INTEL: vcvtqq2ph xmm30, zmm29
|
||||
0x62,0x05,0xfc,0x48,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtqq2ph {rn-sae}, %zmm29, %xmm30
|
||||
# INTEL: vcvtqq2ph xmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0xfc,0x18,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
# INTEL: vcvtqq2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0xfc,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtqq2ph (%r9){1to8}, %xmm30
|
||||
# INTEL: vcvtqq2ph xmm30, qword ptr [r9]{1to8}
|
||||
0x62,0x45,0xfc,0x58,0x5b,0x31
|
||||
|
||||
# ATT: vcvtqq2phz 8128(%rcx), %xmm30
|
||||
# INTEL: vcvtqq2ph xmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0xfc,0x48,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
|
||||
0x62,0x65,0xfc,0xdf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtsd2sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtsd2sh xmm30, xmm29, xmm28
|
||||
0x62,0x05,0x97,0x00,0x5a,0xf4
|
||||
|
||||
# ATT: vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtsd2sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x05,0x97,0x10,0x5a,0xf4
|
||||
|
||||
# ATT: vcvtsd2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vcvtsd2sh xmm30 {k7}, xmm29, qword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x97,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsd2sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vcvtsd2sh xmm30, xmm29, qword ptr [r9]
|
||||
0x62,0x45,0x97,0x00,0x5a,0x31
|
||||
|
||||
# ATT: vcvtsd2sh 1016(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsd2sh xmm30, xmm29, qword ptr [rcx + 1016]
|
||||
0x62,0x65,0x97,0x00,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtsd2sh -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtsd2sh xmm30 {k7} {z}, xmm29, qword ptr [rdx - 1024]
|
||||
0x62,0x65,0x97,0x87,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtsh2sd %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2sd xmm30, xmm29, xmm28
|
||||
0x62,0x05,0x16,0x00,0x5a,0xf4
|
||||
|
||||
# ATT: vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2sd xmm30, xmm29, xmm28, {sae}
|
||||
0x62,0x05,0x16,0x10,0x5a,0xf4
|
||||
|
||||
# ATT: vcvtsh2sd 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vcvtsh2sd xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x16,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsh2sd (%r9), %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2sd xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x45,0x16,0x00,0x5a,0x31
|
||||
|
||||
# ATT: vcvtsh2sd 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2sd xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x65,0x16,0x00,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtsh2sd -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtsh2sd xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x65,0x16,0x87,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtsh2si %xmm30, %edx
|
||||
# INTEL: vcvtsh2si edx, xmm30
|
||||
0x62,0x95,0x7e,0x08,0x2d,0xd6
|
||||
|
||||
# ATT: vcvtsh2si {rn-sae}, %xmm30, %edx
|
||||
# INTEL: vcvtsh2si edx, xmm30, {rn-sae}
|
||||
0x62,0x95,0x7e,0x18,0x2d,0xd6
|
||||
|
||||
# ATT: vcvtsh2si %xmm30, %r12
|
||||
# INTEL: vcvtsh2si r12, xmm30
|
||||
0x62,0x15,0xfe,0x08,0x2d,0xe6
|
||||
|
||||
# ATT: vcvtsh2si {rn-sae}, %xmm30, %r12
|
||||
# INTEL: vcvtsh2si r12, xmm30, {rn-sae}
|
||||
0x62,0x15,0xfe,0x18,0x2d,0xe6
|
||||
|
||||
# ATT: vcvtsh2si 268435456(%rbp,%r14,8), %edx
|
||||
# INTEL: vcvtsh2si edx, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0xb5,0x7e,0x08,0x2d,0x94,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsh2si (%r9), %edx
|
||||
# INTEL: vcvtsh2si edx, word ptr [r9]
|
||||
0x62,0xd5,0x7e,0x08,0x2d,0x11
|
||||
|
||||
# ATT: vcvtsh2si 254(%rcx), %edx
|
||||
# INTEL: vcvtsh2si edx, word ptr [rcx + 254]
|
||||
0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f
|
||||
|
||||
# ATT: vcvtsh2si -256(%rdx), %edx
|
||||
# INTEL: vcvtsh2si edx, word ptr [rdx - 256]
|
||||
0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80
|
||||
|
||||
# ATT: vcvtsh2si 268435456(%rbp,%r14,8), %r12
|
||||
# INTEL: vcvtsh2si r12, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x35,0xfe,0x08,0x2d,0xa4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsh2si (%r9), %r12
|
||||
# INTEL: vcvtsh2si r12, word ptr [r9]
|
||||
0x62,0x55,0xfe,0x08,0x2d,0x21
|
||||
|
||||
# ATT: vcvtsh2si 254(%rcx), %r12
|
||||
# INTEL: vcvtsh2si r12, word ptr [rcx + 254]
|
||||
0x62,0x75,0xfe,0x08,0x2d,0x61,0x7f
|
||||
|
||||
# ATT: vcvtsh2si -256(%rdx), %r12
|
||||
# INTEL: vcvtsh2si r12, word ptr [rdx - 256]
|
||||
0x62,0x75,0xfe,0x08,0x2d,0x62,0x80
|
||||
|
||||
# ATT: vcvtsh2ss %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2ss xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x14,0x00,0x13,0xf4
|
||||
|
||||
# ATT: vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2ss xmm30, xmm29, xmm28, {sae}
|
||||
0x62,0x06,0x14,0x10,0x13,0xf4
|
||||
|
||||
# ATT: vcvtsh2ss 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vcvtsh2ss xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x14,0x07,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsh2ss (%r9), %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2ss xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x14,0x00,0x13,0x31
|
||||
|
||||
# ATT: vcvtsh2ss 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsh2ss xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x14,0x00,0x13,0x71,0x7f
|
||||
|
||||
# ATT: vcvtsh2ss -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtsh2ss xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x14,0x87,0x13,0x72,0x80
|
||||
|
||||
# ATT: vcvtsh2usi %xmm30, %edx
|
||||
# INTEL: vcvtsh2usi edx, xmm30
|
||||
0x62,0x95,0x7e,0x08,0x79,0xd6
|
||||
|
||||
# ATT: vcvtsh2usi {rn-sae}, %xmm30, %edx
|
||||
# INTEL: vcvtsh2usi edx, xmm30, {rn-sae}
|
||||
0x62,0x95,0x7e,0x18,0x79,0xd6
|
||||
|
||||
# ATT: vcvtsh2usi %xmm30, %r12
|
||||
# INTEL: vcvtsh2usi r12, xmm30
|
||||
0x62,0x15,0xfe,0x08,0x79,0xe6
|
||||
|
||||
# ATT: vcvtsh2usi {rn-sae}, %xmm30, %r12
|
||||
# INTEL: vcvtsh2usi r12, xmm30, {rn-sae}
|
||||
0x62,0x15,0xfe,0x18,0x79,0xe6
|
||||
|
||||
# ATT: vcvtsh2usi 268435456(%rbp,%r14,8), %edx
|
||||
# INTEL: vcvtsh2usi edx, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0xb5,0x7e,0x08,0x79,0x94,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsh2usi (%r9), %edx
|
||||
# INTEL: vcvtsh2usi edx, word ptr [r9]
|
||||
0x62,0xd5,0x7e,0x08,0x79,0x11
|
||||
|
||||
# ATT: vcvtsh2usi 254(%rcx), %edx
|
||||
# INTEL: vcvtsh2usi edx, word ptr [rcx + 254]
|
||||
0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f
|
||||
|
||||
# ATT: vcvtsh2usi -256(%rdx), %edx
|
||||
# INTEL: vcvtsh2usi edx, word ptr [rdx - 256]
|
||||
0x62,0xf5,0x7e,0x08,0x79,0x52,0x80
|
||||
|
||||
# ATT: vcvtsh2usi 268435456(%rbp,%r14,8), %r12
|
||||
# INTEL: vcvtsh2usi r12, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x35,0xfe,0x08,0x79,0xa4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsh2usi (%r9), %r12
|
||||
# INTEL: vcvtsh2usi r12, word ptr [r9]
|
||||
0x62,0x55,0xfe,0x08,0x79,0x21
|
||||
|
||||
# ATT: vcvtsh2usi 254(%rcx), %r12
|
||||
# INTEL: vcvtsh2usi r12, word ptr [rcx + 254]
|
||||
0x62,0x75,0xfe,0x08,0x79,0x61,0x7f
|
||||
|
||||
# ATT: vcvtsh2usi -256(%rdx), %r12
|
||||
# INTEL: vcvtsh2usi r12, word ptr [rdx - 256]
|
||||
0x62,0x75,0xfe,0x08,0x79,0x62,0x80
|
||||
|
||||
# ATT: vcvtsi2sh %r12, %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, r12
|
||||
0x62,0x45,0x96,0x00,0x2a,0xf4
|
||||
|
||||
# ATT: vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, {rn-sae}, r12
|
||||
0x62,0x45,0x96,0x10,0x2a,0xf4
|
||||
|
||||
# ATT: vcvtsi2sh %edx, %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, edx
|
||||
0x62,0x65,0x16,0x00,0x2a,0xf2
|
||||
|
||||
# ATT: vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, {rn-sae}, edx
|
||||
0x62,0x65,0x16,0x10,0x2a,0xf2
|
||||
|
||||
# ATT: vcvtsi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x16,0x00,0x2a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtsi2shl (%r9), %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [r9]
|
||||
0x62,0x45,0x16,0x00,0x2a,0x31
|
||||
|
||||
# ATT: vcvtsi2shl 508(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rcx + 508]
|
||||
0x62,0x65,0x16,0x00,0x2a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtsi2shl -512(%rdx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rdx - 512]
|
||||
0x62,0x65,0x16,0x00,0x2a,0x72,0x80
|
||||
|
||||
# ATT: vcvtsi2shq 1016(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, qword ptr [rcx + 1016]
|
||||
0x62,0x65,0x96,0x00,0x2a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtsi2shq -1024(%rdx), %xmm29, %xmm30
|
||||
# INTEL: vcvtsi2sh xmm30, xmm29, qword ptr [rdx - 1024]
|
||||
0x62,0x65,0x96,0x00,0x2a,0x72,0x80
|
||||
|
||||
# ATT: vcvtss2sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtss2sh xmm30, xmm29, xmm28
|
||||
0x62,0x05,0x14,0x00,0x1d,0xf4
|
||||
|
||||
# ATT: vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vcvtss2sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x05,0x14,0x10,0x1d,0xf4
|
||||
|
||||
# ATT: vcvtss2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vcvtss2sh xmm30 {k7}, xmm29, dword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x14,0x07,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtss2sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vcvtss2sh xmm30, xmm29, dword ptr [r9]
|
||||
0x62,0x45,0x14,0x00,0x1d,0x31
|
||||
|
||||
# ATT: vcvtss2sh 508(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtss2sh xmm30, xmm29, dword ptr [rcx + 508]
|
||||
0x62,0x65,0x14,0x00,0x1d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtss2sh -512(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtss2sh xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]
|
||||
0x62,0x65,0x14,0x87,0x1d,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2dq %ymm29, %zmm30
|
||||
# INTEL: vcvttph2dq zmm30, ymm29
|
||||
0x62,0x05,0x7e,0x48,0x5b,0xf5
|
||||
|
||||
# ATT: vcvttph2dq {sae}, %ymm29, %zmm30
|
||||
# INTEL: vcvttph2dq zmm30, ymm29, {sae}
|
||||
0x62,0x05,0x7e,0x18,0x5b,0xf5
|
||||
|
||||
# ATT: vcvttph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvttph2dq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7e,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2dq (%r9){1to16}, %zmm30
|
||||
# INTEL: vcvttph2dq zmm30, word ptr [r9]{1to16}
|
||||
0x62,0x45,0x7e,0x58,0x5b,0x31
|
||||
|
||||
# ATT: vcvttph2dq 4064(%rcx), %zmm30
|
||||
# INTEL: vcvttph2dq zmm30, ymmword ptr [rcx + 4064]
|
||||
0x62,0x65,0x7e,0x48,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvttph2dq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
0x62,0x65,0x7e,0xdf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2qq %xmm29, %zmm30
|
||||
# INTEL: vcvttph2qq zmm30, xmm29
|
||||
0x62,0x05,0x7d,0x48,0x7a,0xf5
|
||||
|
||||
# ATT: vcvttph2qq {sae}, %xmm29, %zmm30
|
||||
# INTEL: vcvttph2qq zmm30, xmm29, {sae}
|
||||
0x62,0x05,0x7d,0x18,0x7a,0xf5
|
||||
|
||||
# ATT: vcvttph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvttph2qq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2qq (%r9){1to8}, %zmm30
|
||||
# INTEL: vcvttph2qq zmm30, word ptr [r9]{1to8}
|
||||
0x62,0x45,0x7d,0x58,0x7a,0x31
|
||||
|
||||
# ATT: vcvttph2qq 2032(%rcx), %zmm30
|
||||
# INTEL: vcvttph2qq zmm30, xmmword ptr [rcx + 2032]
|
||||
0x62,0x65,0x7d,0x48,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvttph2qq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
0x62,0x65,0x7d,0xdf,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2udq %ymm29, %zmm30
|
||||
# INTEL: vcvttph2udq zmm30, ymm29
|
||||
0x62,0x05,0x7c,0x48,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2udq {sae}, %ymm29, %zmm30
|
||||
# INTEL: vcvttph2udq zmm30, ymm29, {sae}
|
||||
0x62,0x05,0x7c,0x18,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvttph2udq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7c,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2udq (%r9){1to16}, %zmm30
|
||||
# INTEL: vcvttph2udq zmm30, word ptr [r9]{1to16}
|
||||
0x62,0x45,0x7c,0x58,0x78,0x31
|
||||
|
||||
# ATT: vcvttph2udq 4064(%rcx), %zmm30
|
||||
# INTEL: vcvttph2udq zmm30, ymmword ptr [rcx + 4064]
|
||||
0x62,0x65,0x7c,0x48,0x78,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvttph2udq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
0x62,0x65,0x7c,0xdf,0x78,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2uqq %xmm29, %zmm30
|
||||
# INTEL: vcvttph2uqq zmm30, xmm29
|
||||
0x62,0x05,0x7d,0x48,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2uqq {sae}, %xmm29, %zmm30
|
||||
# INTEL: vcvttph2uqq zmm30, xmm29, {sae}
|
||||
0x62,0x05,0x7d,0x18,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvttph2uqq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2uqq (%r9){1to8}, %zmm30
|
||||
# INTEL: vcvttph2uqq zmm30, word ptr [r9]{1to8}
|
||||
0x62,0x45,0x7d,0x58,0x78,0x31
|
||||
|
||||
# ATT: vcvttph2uqq 2032(%rcx), %zmm30
|
||||
# INTEL: vcvttph2uqq zmm30, xmmword ptr [rcx + 2032]
|
||||
0x62,0x65,0x7d,0x48,0x78,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvttph2uqq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
0x62,0x65,0x7d,0xdf,0x78,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2uw %zmm29, %zmm30
|
||||
# INTEL: vcvttph2uw zmm30, zmm29
|
||||
0x62,0x05,0x7c,0x48,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2uw {sae}, %zmm29, %zmm30
|
||||
# INTEL: vcvttph2uw zmm30, zmm29, {sae}
|
||||
0x62,0x05,0x7c,0x18,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvttph2uw zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7c,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2uw (%r9){1to32}, %zmm30
|
||||
# INTEL: vcvttph2uw zmm30, word ptr [r9]{1to32}
|
||||
0x62,0x45,0x7c,0x58,0x7c,0x31
|
||||
|
||||
# ATT: vcvttph2uw 8128(%rcx), %zmm30
|
||||
# INTEL: vcvttph2uw zmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7c,0x48,0x7c,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvttph2uw zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x65,0x7c,0xdf,0x7c,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2w %zmm29, %zmm30
|
||||
# INTEL: vcvttph2w zmm30, zmm29
|
||||
0x62,0x05,0x7d,0x48,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2w {sae}, %zmm29, %zmm30
|
||||
# INTEL: vcvttph2w zmm30, zmm29, {sae}
|
||||
0x62,0x05,0x7d,0x18,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvttph2w zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7d,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2w (%r9){1to32}, %zmm30
|
||||
# INTEL: vcvttph2w zmm30, word ptr [r9]{1to32}
|
||||
0x62,0x45,0x7d,0x58,0x7c,0x31
|
||||
|
||||
# ATT: vcvttph2w 8128(%rcx), %zmm30
|
||||
# INTEL: vcvttph2w zmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7d,0x48,0x7c,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvttph2w zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x65,0x7d,0xdf,0x7c,0x72,0x80
|
||||
|
||||
# ATT: vcvttsh2si %xmm30, %edx
|
||||
# INTEL: vcvttsh2si edx, xmm30
|
||||
0x62,0x95,0x7e,0x08,0x2c,0xd6
|
||||
|
||||
# ATT: vcvttsh2si {sae}, %xmm30, %edx
|
||||
# INTEL: vcvttsh2si edx, xmm30, {sae}
|
||||
0x62,0x95,0x7e,0x18,0x2c,0xd6
|
||||
|
||||
# ATT: vcvttsh2si %xmm30, %r12
|
||||
# INTEL: vcvttsh2si r12, xmm30
|
||||
0x62,0x15,0xfe,0x08,0x2c,0xe6
|
||||
|
||||
# ATT: vcvttsh2si {sae}, %xmm30, %r12
|
||||
# INTEL: vcvttsh2si r12, xmm30, {sae}
|
||||
0x62,0x15,0xfe,0x18,0x2c,0xe6
|
||||
|
||||
# ATT: vcvttsh2si 268435456(%rbp,%r14,8), %edx
|
||||
# INTEL: vcvttsh2si edx, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0xb5,0x7e,0x08,0x2c,0x94,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttsh2si (%r9), %edx
|
||||
# INTEL: vcvttsh2si edx, word ptr [r9]
|
||||
0x62,0xd5,0x7e,0x08,0x2c,0x11
|
||||
|
||||
# ATT: vcvttsh2si 254(%rcx), %edx
|
||||
# INTEL: vcvttsh2si edx, word ptr [rcx + 254]
|
||||
0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f
|
||||
|
||||
# ATT: vcvttsh2si -256(%rdx), %edx
|
||||
# INTEL: vcvttsh2si edx, word ptr [rdx - 256]
|
||||
0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80
|
||||
|
||||
# ATT: vcvttsh2si 268435456(%rbp,%r14,8), %r12
|
||||
# INTEL: vcvttsh2si r12, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x35,0xfe,0x08,0x2c,0xa4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttsh2si (%r9), %r12
|
||||
# INTEL: vcvttsh2si r12, word ptr [r9]
|
||||
0x62,0x55,0xfe,0x08,0x2c,0x21
|
||||
|
||||
# ATT: vcvttsh2si 254(%rcx), %r12
|
||||
# INTEL: vcvttsh2si r12, word ptr [rcx + 254]
|
||||
0x62,0x75,0xfe,0x08,0x2c,0x61,0x7f
|
||||
|
||||
# ATT: vcvttsh2si -256(%rdx), %r12
|
||||
# INTEL: vcvttsh2si r12, word ptr [rdx - 256]
|
||||
0x62,0x75,0xfe,0x08,0x2c,0x62,0x80
|
||||
|
||||
# ATT: vcvttsh2usi %xmm30, %edx
|
||||
# INTEL: vcvttsh2usi edx, xmm30
|
||||
0x62,0x95,0x7e,0x08,0x78,0xd6
|
||||
|
||||
# ATT: vcvttsh2usi {sae}, %xmm30, %edx
|
||||
# INTEL: vcvttsh2usi edx, xmm30, {sae}
|
||||
0x62,0x95,0x7e,0x18,0x78,0xd6
|
||||
|
||||
# ATT: vcvttsh2usi %xmm30, %r12
|
||||
# INTEL: vcvttsh2usi r12, xmm30
|
||||
0x62,0x15,0xfe,0x08,0x78,0xe6
|
||||
|
||||
# ATT: vcvttsh2usi {sae}, %xmm30, %r12
|
||||
# INTEL: vcvttsh2usi r12, xmm30, {sae}
|
||||
0x62,0x15,0xfe,0x18,0x78,0xe6
|
||||
|
||||
# ATT: vcvttsh2usi 268435456(%rbp,%r14,8), %edx
|
||||
# INTEL: vcvttsh2usi edx, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0xb5,0x7e,0x08,0x78,0x94,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttsh2usi (%r9), %edx
|
||||
# INTEL: vcvttsh2usi edx, word ptr [r9]
|
||||
0x62,0xd5,0x7e,0x08,0x78,0x11
|
||||
|
||||
# ATT: vcvttsh2usi 254(%rcx), %edx
|
||||
# INTEL: vcvttsh2usi edx, word ptr [rcx + 254]
|
||||
0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f
|
||||
|
||||
# ATT: vcvttsh2usi -256(%rdx), %edx
|
||||
# INTEL: vcvttsh2usi edx, word ptr [rdx - 256]
|
||||
0x62,0xf5,0x7e,0x08,0x78,0x52,0x80
|
||||
|
||||
# ATT: vcvttsh2usi 268435456(%rbp,%r14,8), %r12
|
||||
# INTEL: vcvttsh2usi r12, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x35,0xfe,0x08,0x78,0xa4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttsh2usi (%r9), %r12
|
||||
# INTEL: vcvttsh2usi r12, word ptr [r9]
|
||||
0x62,0x55,0xfe,0x08,0x78,0x21
|
||||
|
||||
# ATT: vcvttsh2usi 254(%rcx), %r12
|
||||
# INTEL: vcvttsh2usi r12, word ptr [rcx + 254]
|
||||
0x62,0x75,0xfe,0x08,0x78,0x61,0x7f
|
||||
|
||||
# ATT: vcvttsh2usi -256(%rdx), %r12
|
||||
# INTEL: vcvttsh2usi r12, word ptr [rdx - 256]
|
||||
0x62,0x75,0xfe,0x08,0x78,0x62,0x80
|
||||
|
||||
# ATT: vcvtudq2ph %zmm29, %ymm30
|
||||
# INTEL: vcvtudq2ph ymm30, zmm29
|
||||
0x62,0x05,0x7f,0x48,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtudq2ph {rn-sae}, %zmm29, %ymm30
|
||||
# INTEL: vcvtudq2ph ymm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7f,0x18,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtudq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
# INTEL: vcvtudq2ph ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7f,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtudq2ph (%r9){1to16}, %ymm30
|
||||
# INTEL: vcvtudq2ph ymm30, dword ptr [r9]{1to16}
|
||||
0x62,0x45,0x7f,0x58,0x7a,0x31
|
||||
|
||||
# ATT: vcvtudq2ph 8128(%rcx), %ymm30
|
||||
# INTEL: vcvtudq2ph ymm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7f,0x48,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtudq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
# INTEL: vcvtudq2ph ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
|
||||
0x62,0x65,0x7f,0xdf,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvtuqq2ph %zmm29, %xmm30
|
||||
# INTEL: vcvtuqq2ph xmm30, zmm29
|
||||
0x62,0x05,0xff,0x48,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
|
||||
# INTEL: vcvtuqq2ph xmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0xff,0x18,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtuqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
# INTEL: vcvtuqq2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0xff,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtuqq2ph (%r9){1to8}, %xmm30
|
||||
# INTEL: vcvtuqq2ph xmm30, qword ptr [r9]{1to8}
|
||||
0x62,0x45,0xff,0x58,0x7a,0x31
|
||||
|
||||
# ATT: vcvtuqq2phz 8128(%rcx), %xmm30
|
||||
# INTEL: vcvtuqq2ph xmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0xff,0x48,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtuqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
# INTEL: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
|
||||
0x62,0x65,0xff,0xdf,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvtusi2sh %r12, %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, r12
|
||||
0x62,0x45,0x96,0x00,0x7b,0xf4
|
||||
|
||||
# ATT: vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, {rn-sae}, r12
|
||||
0x62,0x45,0x96,0x10,0x7b,0xf4
|
||||
|
||||
# ATT: vcvtusi2sh %edx, %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, edx
|
||||
0x62,0x65,0x16,0x00,0x7b,0xf2
|
||||
|
||||
# ATT: vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, {rn-sae}, edx
|
||||
0x62,0x65,0x16,0x10,0x7b,0xf2
|
||||
|
||||
# ATT: vcvtusi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x16,0x00,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtusi2shl (%r9), %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [r9]
|
||||
0x62,0x45,0x16,0x00,0x7b,0x31
|
||||
|
||||
# ATT: vcvtusi2shl 508(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rcx + 508]
|
||||
0x62,0x65,0x16,0x00,0x7b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtusi2shl -512(%rdx), %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rdx - 512]
|
||||
0x62,0x65,0x16,0x00,0x7b,0x72,0x80
|
||||
|
||||
# ATT: vcvtusi2shq 1016(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, qword ptr [rcx + 1016]
|
||||
0x62,0x65,0x96,0x00,0x7b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtusi2shq -1024(%rdx), %xmm29, %xmm30
|
||||
# INTEL: vcvtusi2sh xmm30, xmm29, qword ptr [rdx - 1024]
|
||||
0x62,0x65,0x96,0x00,0x7b,0x72,0x80
|
||||
|
||||
# ATT: vcvtuw2ph %zmm29, %zmm30
|
||||
# INTEL: vcvtuw2ph zmm30, zmm29
|
||||
0x62,0x05,0x7f,0x48,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtuw2ph {rn-sae}, %zmm29, %zmm30
|
||||
# INTEL: vcvtuw2ph zmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7f,0x18,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtuw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtuw2ph zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7f,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtuw2ph (%r9){1to32}, %zmm30
|
||||
# INTEL: vcvtuw2ph zmm30, word ptr [r9]{1to32}
|
||||
0x62,0x45,0x7f,0x58,0x7d,0x31
|
||||
|
||||
# ATT: vcvtuw2ph 8128(%rcx), %zmm30
|
||||
# INTEL: vcvtuw2ph zmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7f,0x48,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtuw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtuw2ph zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x65,0x7f,0xdf,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtw2ph %zmm29, %zmm30
|
||||
# INTEL: vcvtw2ph zmm30, zmm29
|
||||
0x62,0x05,0x7e,0x48,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtw2ph {rn-sae}, %zmm29, %zmm30
|
||||
# INTEL: vcvtw2ph zmm30, zmm29, {rn-sae}
|
||||
0x62,0x05,0x7e,0x18,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
# INTEL: vcvtw2ph zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x25,0x7e,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtw2ph (%r9){1to32}, %zmm30
|
||||
# INTEL: vcvtw2ph zmm30, word ptr [r9]{1to32}
|
||||
0x62,0x45,0x7e,0x58,0x7d,0x31
|
||||
|
||||
# ATT: vcvtw2ph 8128(%rcx), %zmm30
|
||||
# INTEL: vcvtw2ph zmm30, zmmword ptr [rcx + 8128]
|
||||
0x62,0x65,0x7e,0x48,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
# INTEL: vcvtw2ph zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x65,0x7e,0xdf,0x7d,0x72,0x80
|
||||
|
|
|
@ -280,3 +280,859 @@
|
|||
# ATT: vsubph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vsubph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x54,0x9f,0x5c,0x72,0x80
|
||||
|
||||
# ATT: vcvtdq2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtdq2ph xmm6, xmm5
|
||||
0x62,0xf5,0x7c,0x08,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtdq2ph %ymm5, %xmm6
|
||||
# INTEL: vcvtdq2ph xmm6, ymm5
|
||||
0x62,0xf5,0x7c,0x28,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtdq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtdq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtdq2ph (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtdq2ph xmm6, dword ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7c,0x18,0x5b,0x31
|
||||
|
||||
# ATT: vcvtdq2phx 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtdq2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7c,0x08,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtdq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtdq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
|
||||
0x62,0xf5,0x7c,0x9f,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtdq2ph (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtdq2ph xmm6, dword ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7c,0x38,0x5b,0x31
|
||||
|
||||
# ATT: vcvtdq2phy 4064(%ecx), %xmm6
|
||||
# INTEL: vcvtdq2ph xmm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7c,0x28,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtdq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtdq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
|
||||
0x62,0xf5,0x7c,0xbf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtpd2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtpd2ph xmm6, xmm5
|
||||
0x62,0xf5,0xfd,0x08,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtpd2ph %ymm5, %xmm6
|
||||
# INTEL: vcvtpd2ph xmm6, ymm5
|
||||
0x62,0xf5,0xfd,0x28,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtpd2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtpd2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0xfd,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtpd2ph (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvtpd2ph xmm6, qword ptr [ecx]{1to2}
|
||||
0x62,0xf5,0xfd,0x18,0x5a,0x31
|
||||
|
||||
# ATT: vcvtpd2phx 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtpd2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0xfd,0x08,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtpd2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
|
||||
0x62,0xf5,0xfd,0x9f,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtpd2ph (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtpd2ph xmm6, qword ptr [ecx]{1to4}
|
||||
0x62,0xf5,0xfd,0x38,0x5a,0x31
|
||||
|
||||
# ATT: vcvtpd2phy 4064(%ecx), %xmm6
|
||||
# INTEL: vcvtpd2ph xmm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0xfd,0x28,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtpd2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
|
||||
0x62,0xf5,0xfd,0xbf,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2dq %xmm5, %xmm6
|
||||
# INTEL: vcvtph2dq xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtph2dq %xmm5, %ymm6
|
||||
# INTEL: vcvtph2dq ymm6, xmm5
|
||||
0x62,0xf5,0x7d,0x28,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2dq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2dq (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtph2dq xmm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7d,0x18,0x5b,0x31
|
||||
|
||||
# ATT: vcvtph2dq 1016(%ecx), %xmm6
|
||||
# INTEL: vcvtph2dq xmm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7d,0x08,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2dq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7d,0x9f,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2dq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2dq (%ecx){1to8}, %ymm6
|
||||
# INTEL: vcvtph2dq ymm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7d,0x38,0x5b,0x31
|
||||
|
||||
# ATT: vcvtph2dq 2032(%ecx), %ymm6
|
||||
# INTEL: vcvtph2dq ymm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7d,0x28,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2dq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7d,0xbf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2pd %xmm5, %xmm6
|
||||
# INTEL: vcvtph2pd xmm6, xmm5
|
||||
0x62,0xf5,0x7c,0x08,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtph2pd %xmm5, %ymm6
|
||||
# INTEL: vcvtph2pd ymm6, xmm5
|
||||
0x62,0xf5,0x7c,0x28,0x5a,0xf5
|
||||
|
||||
# ATT: vcvtph2pd 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2pd xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2pd (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvtph2pd xmm6, word ptr [ecx]{1to2}
|
||||
0x62,0xf5,0x7c,0x18,0x5a,0x31
|
||||
|
||||
# ATT: vcvtph2pd 508(%ecx), %xmm6
|
||||
# INTEL: vcvtph2pd xmm6, dword ptr [ecx + 508]
|
||||
0x62,0xf5,0x7c,0x08,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2pd -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2pd xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
|
||||
0x62,0xf5,0x7c,0x9f,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2pd 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2pd ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x2f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2pd (%ecx){1to4}, %ymm6
|
||||
# INTEL: vcvtph2pd ymm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7c,0x38,0x5a,0x31
|
||||
|
||||
# ATT: vcvtph2pd 1016(%ecx), %ymm6
|
||||
# INTEL: vcvtph2pd ymm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7c,0x28,0x5a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2pd -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2pd ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7c,0xbf,0x5a,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2psx %xmm5, %xmm6
|
||||
# INTEL: vcvtph2psx xmm6, xmm5
|
||||
0x62,0xf6,0x7d,0x08,0x13,0xf5
|
||||
|
||||
# ATT: vcvtph2psx %xmm5, %ymm6
|
||||
# INTEL: vcvtph2psx ymm6, xmm5
|
||||
0x62,0xf6,0x7d,0x28,0x13,0xf5
|
||||
|
||||
# ATT: vcvtph2psx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2psx xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x7d,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2psx (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtph2psx xmm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf6,0x7d,0x18,0x13,0x31
|
||||
|
||||
# ATT: vcvtph2psx 1016(%ecx), %xmm6
|
||||
# INTEL: vcvtph2psx xmm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf6,0x7d,0x08,0x13,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2psx -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2psx xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf6,0x7d,0x9f,0x13,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2psx 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2psx ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x7d,0x2f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2psx (%ecx){1to8}, %ymm6
|
||||
# INTEL: vcvtph2psx ymm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x7d,0x38,0x13,0x31
|
||||
|
||||
# ATT: vcvtph2psx 2032(%ecx), %ymm6
|
||||
# INTEL: vcvtph2psx ymm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x7d,0x28,0x13,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2psx -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2psx ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x7d,0xbf,0x13,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2qq %xmm5, %xmm6
|
||||
# INTEL: vcvtph2qq xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x7b,0xf5
|
||||
|
||||
# ATT: vcvtph2qq %xmm5, %ymm6
|
||||
# INTEL: vcvtph2qq ymm6, xmm5
|
||||
0x62,0xf5,0x7d,0x28,0x7b,0xf5
|
||||
|
||||
# ATT: vcvtph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2qq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2qq (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvtph2qq xmm6, word ptr [ecx]{1to2}
|
||||
0x62,0xf5,0x7d,0x18,0x7b,0x31
|
||||
|
||||
# ATT: vcvtph2qq 508(%ecx), %xmm6
|
||||
# INTEL: vcvtph2qq xmm6, dword ptr [ecx + 508]
|
||||
0x62,0xf5,0x7d,0x08,0x7b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2qq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
|
||||
0x62,0xf5,0x7d,0x9f,0x7b,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2qq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2qq (%ecx){1to4}, %ymm6
|
||||
# INTEL: vcvtph2qq ymm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7d,0x38,0x7b,0x31
|
||||
|
||||
# ATT: vcvtph2qq 1016(%ecx), %ymm6
|
||||
# INTEL: vcvtph2qq ymm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7d,0x28,0x7b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2qq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7d,0xbf,0x7b,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2udq %xmm5, %xmm6
|
||||
# INTEL: vcvtph2udq xmm6, xmm5
|
||||
0x62,0xf5,0x7c,0x08,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2udq %xmm5, %ymm6
|
||||
# INTEL: vcvtph2udq ymm6, xmm5
|
||||
0x62,0xf5,0x7c,0x28,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2udq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2udq (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtph2udq xmm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7c,0x18,0x79,0x31
|
||||
|
||||
# ATT: vcvtph2udq 1016(%ecx), %xmm6
|
||||
# INTEL: vcvtph2udq xmm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7c,0x08,0x79,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2udq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7c,0x9f,0x79,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2udq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2udq (%ecx){1to8}, %ymm6
|
||||
# INTEL: vcvtph2udq ymm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7c,0x38,0x79,0x31
|
||||
|
||||
# ATT: vcvtph2udq 2032(%ecx), %ymm6
|
||||
# INTEL: vcvtph2udq ymm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7c,0x28,0x79,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2udq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7c,0xbf,0x79,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2uqq %xmm5, %xmm6
|
||||
# INTEL: vcvtph2uqq xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2uqq %xmm5, %ymm6
|
||||
# INTEL: vcvtph2uqq ymm6, xmm5
|
||||
0x62,0xf5,0x7d,0x28,0x79,0xf5
|
||||
|
||||
# ATT: vcvtph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2uqq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2uqq (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvtph2uqq xmm6, word ptr [ecx]{1to2}
|
||||
0x62,0xf5,0x7d,0x18,0x79,0x31
|
||||
|
||||
# ATT: vcvtph2uqq 508(%ecx), %xmm6
|
||||
# INTEL: vcvtph2uqq xmm6, dword ptr [ecx + 508]
|
||||
0x62,0xf5,0x7d,0x08,0x79,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2uqq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
|
||||
0x62,0xf5,0x7d,0x9f,0x79,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2uqq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2uqq (%ecx){1to4}, %ymm6
|
||||
# INTEL: vcvtph2uqq ymm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7d,0x38,0x79,0x31
|
||||
|
||||
# ATT: vcvtph2uqq 1016(%ecx), %ymm6
|
||||
# INTEL: vcvtph2uqq ymm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7d,0x28,0x79,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2uqq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7d,0xbf,0x79,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2uw %xmm5, %xmm6
|
||||
# INTEL: vcvtph2uw xmm6, xmm5
|
||||
0x62,0xf5,0x7c,0x08,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2uw %ymm5, %ymm6
|
||||
# INTEL: vcvtph2uw ymm6, ymm5
|
||||
0x62,0xf5,0x7c,0x28,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2uw xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2uw (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtph2uw xmm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7c,0x18,0x7d,0x31
|
||||
|
||||
# ATT: vcvtph2uw 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtph2uw xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7c,0x08,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2uw xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7c,0x9f,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2uw ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2uw (%ecx){1to16}, %ymm6
|
||||
# INTEL: vcvtph2uw ymm6, word ptr [ecx]{1to16}
|
||||
0x62,0xf5,0x7c,0x38,0x7d,0x31
|
||||
|
||||
# ATT: vcvtph2uw 4064(%ecx), %ymm6
|
||||
# INTEL: vcvtph2uw ymm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7c,0x28,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2uw ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7c,0xbf,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2w %xmm5, %xmm6
|
||||
# INTEL: vcvtph2w xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2w %ymm5, %ymm6
|
||||
# INTEL: vcvtph2w ymm6, ymm5
|
||||
0x62,0xf5,0x7d,0x28,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtph2w xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2w (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtph2w xmm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7d,0x18,0x7d,0x31
|
||||
|
||||
# ATT: vcvtph2w 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtph2w xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7d,0x08,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtph2w xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7d,0x9f,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtph2w ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtph2w (%ecx){1to16}, %ymm6
|
||||
# INTEL: vcvtph2w ymm6, word ptr [ecx]{1to16}
|
||||
0x62,0xf5,0x7d,0x38,0x7d,0x31
|
||||
|
||||
# ATT: vcvtph2w 4064(%ecx), %ymm6
|
||||
# INTEL: vcvtph2w ymm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7d,0x28,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtph2w ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7d,0xbf,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtps2phx %xmm5, %xmm6
|
||||
# INTEL: vcvtps2phx xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x1d,0xf5
|
||||
|
||||
# ATT: vcvtps2phx %ymm5, %xmm6
|
||||
# INTEL: vcvtps2phx xmm6, ymm5
|
||||
0x62,0xf5,0x7d,0x28,0x1d,0xf5
|
||||
|
||||
# ATT: vcvtps2phxx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtps2phx xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtps2phx (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtps2phx xmm6, dword ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7d,0x18,0x1d,0x31
|
||||
|
||||
# ATT: vcvtps2phxx 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtps2phx xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7d,0x08,0x1d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtps2phx -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtps2phx xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
|
||||
0x62,0xf5,0x7d,0x9f,0x1d,0x72,0x80
|
||||
|
||||
# ATT: vcvtps2phx (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtps2phx xmm6, dword ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7d,0x38,0x1d,0x31
|
||||
|
||||
# ATT: vcvtps2phxy 4064(%ecx), %xmm6
|
||||
# INTEL: vcvtps2phx xmm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7d,0x28,0x1d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtps2phx -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtps2phx xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
|
||||
0x62,0xf5,0x7d,0xbf,0x1d,0x72,0x80
|
||||
|
||||
# ATT: vcvtqq2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtqq2ph xmm6, xmm5
|
||||
0x62,0xf5,0xfc,0x08,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtqq2ph %ymm5, %xmm6
|
||||
# INTEL: vcvtqq2ph xmm6, ymm5
|
||||
0x62,0xf5,0xfc,0x28,0x5b,0xf5
|
||||
|
||||
# ATT: vcvtqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtqq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0xfc,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtqq2ph (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvtqq2ph xmm6, qword ptr [ecx]{1to2}
|
||||
0x62,0xf5,0xfc,0x18,0x5b,0x31
|
||||
|
||||
# ATT: vcvtqq2phx 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtqq2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0xfc,0x08,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
|
||||
0x62,0xf5,0xfc,0x9f,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvtqq2ph (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtqq2ph xmm6, qword ptr [ecx]{1to4}
|
||||
0x62,0xf5,0xfc,0x38,0x5b,0x31
|
||||
|
||||
# ATT: vcvtqq2phy 4064(%ecx), %xmm6
|
||||
# INTEL: vcvtqq2ph xmm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0xfc,0x28,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvtqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
|
||||
0x62,0xf5,0xfc,0xbf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2dq %xmm5, %xmm6
|
||||
# INTEL: vcvttph2dq xmm6, xmm5
|
||||
0x62,0xf5,0x7e,0x08,0x5b,0xf5
|
||||
|
||||
# ATT: vcvttph2dq %xmm5, %ymm6
|
||||
# INTEL: vcvttph2dq ymm6, xmm5
|
||||
0x62,0xf5,0x7e,0x28,0x5b,0xf5
|
||||
|
||||
# ATT: vcvttph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvttph2dq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7e,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2dq (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvttph2dq xmm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7e,0x18,0x5b,0x31
|
||||
|
||||
# ATT: vcvttph2dq 1016(%ecx), %xmm6
|
||||
# INTEL: vcvttph2dq xmm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7e,0x08,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvttph2dq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7e,0x9f,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvttph2dq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7e,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2dq (%ecx){1to8}, %ymm6
|
||||
# INTEL: vcvttph2dq ymm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7e,0x38,0x5b,0x31
|
||||
|
||||
# ATT: vcvttph2dq 2032(%ecx), %ymm6
|
||||
# INTEL: vcvttph2dq ymm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7e,0x28,0x5b,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvttph2dq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7e,0xbf,0x5b,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2qq %xmm5, %xmm6
|
||||
# INTEL: vcvttph2qq xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x7a,0xf5
|
||||
|
||||
# ATT: vcvttph2qq %xmm5, %ymm6
|
||||
# INTEL: vcvttph2qq ymm6, xmm5
|
||||
0x62,0xf5,0x7d,0x28,0x7a,0xf5
|
||||
|
||||
# ATT: vcvttph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvttph2qq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2qq (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvttph2qq xmm6, word ptr [ecx]{1to2}
|
||||
0x62,0xf5,0x7d,0x18,0x7a,0x31
|
||||
|
||||
# ATT: vcvttph2qq 508(%ecx), %xmm6
|
||||
# INTEL: vcvttph2qq xmm6, dword ptr [ecx + 508]
|
||||
0x62,0xf5,0x7d,0x08,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvttph2qq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
|
||||
0x62,0xf5,0x7d,0x9f,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvttph2qq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2qq (%ecx){1to4}, %ymm6
|
||||
# INTEL: vcvttph2qq ymm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7d,0x38,0x7a,0x31
|
||||
|
||||
# ATT: vcvttph2qq 1016(%ecx), %ymm6
|
||||
# INTEL: vcvttph2qq ymm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7d,0x28,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvttph2qq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7d,0xbf,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2udq %xmm5, %xmm6
|
||||
# INTEL: vcvttph2udq xmm6, xmm5
|
||||
0x62,0xf5,0x7c,0x08,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2udq %xmm5, %ymm6
|
||||
# INTEL: vcvttph2udq ymm6, xmm5
|
||||
0x62,0xf5,0x7c,0x28,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvttph2udq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2udq (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvttph2udq xmm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7c,0x18,0x78,0x31
|
||||
|
||||
# ATT: vcvttph2udq 1016(%ecx), %xmm6
|
||||
# INTEL: vcvttph2udq xmm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7c,0x08,0x78,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvttph2udq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7c,0x9f,0x78,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvttph2udq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2udq (%ecx){1to8}, %ymm6
|
||||
# INTEL: vcvttph2udq ymm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7c,0x38,0x78,0x31
|
||||
|
||||
# ATT: vcvttph2udq 2032(%ecx), %ymm6
|
||||
# INTEL: vcvttph2udq ymm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7c,0x28,0x78,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvttph2udq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7c,0xbf,0x78,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2uqq %xmm5, %xmm6
|
||||
# INTEL: vcvttph2uqq xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2uqq %xmm5, %ymm6
|
||||
# INTEL: vcvttph2uqq ymm6, xmm5
|
||||
0x62,0xf5,0x7d,0x28,0x78,0xf5
|
||||
|
||||
# ATT: vcvttph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvttph2uqq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2uqq (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvttph2uqq xmm6, word ptr [ecx]{1to2}
|
||||
0x62,0xf5,0x7d,0x18,0x78,0x31
|
||||
|
||||
# ATT: vcvttph2uqq 508(%ecx), %xmm6
|
||||
# INTEL: vcvttph2uqq xmm6, dword ptr [ecx + 508]
|
||||
0x62,0xf5,0x7d,0x08,0x78,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvttph2uqq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
|
||||
0x62,0xf5,0x7d,0x9f,0x78,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvttph2uqq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2uqq (%ecx){1to4}, %ymm6
|
||||
# INTEL: vcvttph2uqq ymm6, word ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7d,0x38,0x78,0x31
|
||||
|
||||
# ATT: vcvttph2uqq 1016(%ecx), %ymm6
|
||||
# INTEL: vcvttph2uqq ymm6, qword ptr [ecx + 1016]
|
||||
0x62,0xf5,0x7d,0x28,0x78,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvttph2uqq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
|
||||
0x62,0xf5,0x7d,0xbf,0x78,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2uw %xmm5, %xmm6
|
||||
# INTEL: vcvttph2uw xmm6, xmm5
|
||||
0x62,0xf5,0x7c,0x08,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2uw %ymm5, %ymm6
|
||||
# INTEL: vcvttph2uw ymm6, ymm5
|
||||
0x62,0xf5,0x7c,0x28,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvttph2uw xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2uw (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvttph2uw xmm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7c,0x18,0x7c,0x31
|
||||
|
||||
# ATT: vcvttph2uw 2032(%ecx), %xmm6
|
||||
# INTEL: vcvttph2uw xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7c,0x08,0x7c,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvttph2uw xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7c,0x9f,0x7c,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvttph2uw ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7c,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2uw (%ecx){1to16}, %ymm6
|
||||
# INTEL: vcvttph2uw ymm6, word ptr [ecx]{1to16}
|
||||
0x62,0xf5,0x7c,0x38,0x7c,0x31
|
||||
|
||||
# ATT: vcvttph2uw 4064(%ecx), %ymm6
|
||||
# INTEL: vcvttph2uw ymm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7c,0x28,0x7c,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvttph2uw ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7c,0xbf,0x7c,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2w %xmm5, %xmm6
|
||||
# INTEL: vcvttph2w xmm6, xmm5
|
||||
0x62,0xf5,0x7d,0x08,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2w %ymm5, %ymm6
|
||||
# INTEL: vcvttph2w ymm6, ymm5
|
||||
0x62,0xf5,0x7d,0x28,0x7c,0xf5
|
||||
|
||||
# ATT: vcvttph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvttph2w xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2w (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvttph2w xmm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7d,0x18,0x7c,0x31
|
||||
|
||||
# ATT: vcvttph2w 2032(%ecx), %xmm6
|
||||
# INTEL: vcvttph2w xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7d,0x08,0x7c,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvttph2w xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7d,0x9f,0x7c,0x72,0x80
|
||||
|
||||
# ATT: vcvttph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvttph2w ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7d,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvttph2w (%ecx){1to16}, %ymm6
|
||||
# INTEL: vcvttph2w ymm6, word ptr [ecx]{1to16}
|
||||
0x62,0xf5,0x7d,0x38,0x7c,0x31
|
||||
|
||||
# ATT: vcvttph2w 4064(%ecx), %ymm6
|
||||
# INTEL: vcvttph2w ymm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7d,0x28,0x7c,0x71,0x7f
|
||||
|
||||
# ATT: vcvttph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvttph2w ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7d,0xbf,0x7c,0x72,0x80
|
||||
|
||||
# ATT: vcvtudq2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtudq2ph xmm6, xmm5
|
||||
0x62,0xf5,0x7f,0x08,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtudq2ph %ymm5, %xmm6
|
||||
# INTEL: vcvtudq2ph xmm6, ymm5
|
||||
0x62,0xf5,0x7f,0x28,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtudq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtudq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7f,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtudq2ph (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtudq2ph xmm6, dword ptr [ecx]{1to4}
|
||||
0x62,0xf5,0x7f,0x18,0x7a,0x31
|
||||
|
||||
# ATT: vcvtudq2phx 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtudq2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7f,0x08,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtudq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtudq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
|
||||
0x62,0xf5,0x7f,0x9f,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvtudq2ph (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtudq2ph xmm6, dword ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7f,0x38,0x7a,0x31
|
||||
|
||||
# ATT: vcvtudq2phy 4064(%ecx), %xmm6
|
||||
# INTEL: vcvtudq2ph xmm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7f,0x28,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtudq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtudq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
|
||||
0x62,0xf5,0x7f,0xbf,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvtuqq2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtuqq2ph xmm6, xmm5
|
||||
0x62,0xf5,0xff,0x08,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtuqq2ph %ymm5, %xmm6
|
||||
# INTEL: vcvtuqq2ph xmm6, ymm5
|
||||
0x62,0xf5,0xff,0x28,0x7a,0xf5
|
||||
|
||||
# ATT: vcvtuqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtuqq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0xff,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtuqq2ph (%ecx){1to2}, %xmm6
|
||||
# INTEL: vcvtuqq2ph xmm6, qword ptr [ecx]{1to2}
|
||||
0x62,0xf5,0xff,0x18,0x7a,0x31
|
||||
|
||||
# ATT: vcvtuqq2phx 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtuqq2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0xff,0x08,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtuqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
|
||||
0x62,0xf5,0xff,0x9f,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvtuqq2ph (%ecx){1to4}, %xmm6
|
||||
# INTEL: vcvtuqq2ph xmm6, qword ptr [ecx]{1to4}
|
||||
0x62,0xf5,0xff,0x38,0x7a,0x31
|
||||
|
||||
# ATT: vcvtuqq2phy 4064(%ecx), %xmm6
|
||||
# INTEL: vcvtuqq2ph xmm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0xff,0x28,0x7a,0x71,0x7f
|
||||
|
||||
# ATT: vcvtuqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
|
||||
0x62,0xf5,0xff,0xbf,0x7a,0x72,0x80
|
||||
|
||||
# ATT: vcvtuw2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtuw2ph xmm6, xmm5
|
||||
0x62,0xf5,0x7f,0x08,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtuw2ph %ymm5, %ymm6
|
||||
# INTEL: vcvtuw2ph ymm6, ymm5
|
||||
0x62,0xf5,0x7f,0x28,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtuw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtuw2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7f,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtuw2ph (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtuw2ph xmm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7f,0x18,0x7d,0x31
|
||||
|
||||
# ATT: vcvtuw2ph 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtuw2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7f,0x08,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtuw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtuw2ph xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7f,0x9f,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtuw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtuw2ph ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7f,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtuw2ph (%ecx){1to16}, %ymm6
|
||||
# INTEL: vcvtuw2ph ymm6, word ptr [ecx]{1to16}
|
||||
0x62,0xf5,0x7f,0x38,0x7d,0x31
|
||||
|
||||
# ATT: vcvtuw2ph 4064(%ecx), %ymm6
|
||||
# INTEL: vcvtuw2ph ymm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7f,0x28,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtuw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtuw2ph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7f,0xbf,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtw2ph %xmm5, %xmm6
|
||||
# INTEL: vcvtw2ph xmm6, xmm5
|
||||
0x62,0xf5,0x7e,0x08,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtw2ph %ymm5, %ymm6
|
||||
# INTEL: vcvtw2ph ymm6, ymm5
|
||||
0x62,0xf5,0x7e,0x28,0x7d,0xf5
|
||||
|
||||
# ATT: vcvtw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
# INTEL: vcvtw2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7e,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtw2ph (%ecx){1to8}, %xmm6
|
||||
# INTEL: vcvtw2ph xmm6, word ptr [ecx]{1to8}
|
||||
0x62,0xf5,0x7e,0x18,0x7d,0x31
|
||||
|
||||
# ATT: vcvtw2ph 2032(%ecx), %xmm6
|
||||
# INTEL: vcvtw2ph xmm6, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf5,0x7e,0x08,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
# INTEL: vcvtw2ph xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf5,0x7e,0x9f,0x7d,0x72,0x80
|
||||
|
||||
# ATT: vcvtw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
# INTEL: vcvtw2ph ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf5,0x7e,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vcvtw2ph (%ecx){1to16}, %ymm6
|
||||
# INTEL: vcvtw2ph ymm6, word ptr [ecx]{1to16}
|
||||
0x62,0xf5,0x7e,0x38,0x7d,0x31
|
||||
|
||||
# ATT: vcvtw2ph 4064(%ecx), %ymm6
|
||||
# INTEL: vcvtw2ph ymm6, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf5,0x7e,0x28,0x7d,0x71,0x7f
|
||||
|
||||
# ATT: vcvtw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vcvtw2ph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7e,0xbf,0x7d,0x72,0x80
|
||||
|
|
|
@ -459,3 +459,899 @@
|
|||
// CHECK: vucomish -256(%rdx), %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x2e,0x72,0x80]
|
||||
vucomish -256(%rdx), %xmm30
|
||||
|
||||
// CHECK: vcvtdq2ph %zmm29, %ymm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x5b,0xf5]
|
||||
vcvtdq2ph %zmm29, %ymm30
|
||||
|
||||
// CHECK: vcvtdq2ph {rn-sae}, %zmm29, %ymm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x5b,0xf5]
|
||||
vcvtdq2ph {rn-sae}, %zmm29, %ymm30
|
||||
|
||||
// CHECK: vcvtdq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtdq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
|
||||
// CHECK: vcvtdq2ph (%r9){1to16}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x5b,0x31]
|
||||
vcvtdq2ph (%r9){1to16}, %ymm30
|
||||
|
||||
// CHECK: vcvtdq2ph 8128(%rcx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x5b,0x71,0x7f]
|
||||
vcvtdq2ph 8128(%rcx), %ymm30
|
||||
|
||||
// CHECK: vcvtdq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x5b,0x72,0x80]
|
||||
vcvtdq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtpd2ph %zmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0xfd,0x48,0x5a,0xf5]
|
||||
vcvtpd2ph %zmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtpd2ph {rn-sae}, %zmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0xfd,0x18,0x5a,0xf5]
|
||||
vcvtpd2ph {rn-sae}, %zmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtpd2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0xfd,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtpd2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtpd2ph (%r9){1to8}, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0xfd,0x58,0x5a,0x31]
|
||||
vcvtpd2ph (%r9){1to8}, %xmm30
|
||||
|
||||
// CHECK: vcvtpd2phz 8128(%rcx), %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0xfd,0x48,0x5a,0x71,0x7f]
|
||||
vcvtpd2phz 8128(%rcx), %xmm30
|
||||
|
||||
// CHECK: vcvtpd2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0xfd,0xdf,0x5a,0x72,0x80]
|
||||
vcvtpd2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2dq %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x5b,0xf5]
|
||||
vcvtph2dq %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2dq {rn-sae}, %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x5b,0xf5]
|
||||
vcvtph2dq {rn-sae}, %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2dq (%r9){1to16}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x5b,0x31]
|
||||
vcvtph2dq (%r9){1to16}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2dq 4064(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x5b,0x71,0x7f]
|
||||
vcvtph2dq 4064(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x5b,0x72,0x80]
|
||||
vcvtph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2pd %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x5a,0xf5]
|
||||
vcvtph2pd %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2pd {sae}, %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x5a,0xf5]
|
||||
vcvtph2pd {sae}, %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2pd 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2pd 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2pd (%r9){1to8}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x5a,0x31]
|
||||
vcvtph2pd (%r9){1to8}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2pd 2032(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x5a,0x71,0x7f]
|
||||
vcvtph2pd 2032(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2pd -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x5a,0x72,0x80]
|
||||
vcvtph2pd -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2psx %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x7d,0x48,0x13,0xf5]
|
||||
vcvtph2psx %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2psx {sae}, %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x7d,0x18,0x13,0xf5]
|
||||
vcvtph2psx {sae}, %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2psx 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x7d,0x4f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2psx 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2psx (%r9){1to16}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x7d,0x58,0x13,0x31]
|
||||
vcvtph2psx (%r9){1to16}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2psx 4064(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x7d,0x48,0x13,0x71,0x7f]
|
||||
vcvtph2psx 4064(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2psx -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x7d,0xdf,0x13,0x72,0x80]
|
||||
vcvtph2psx -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2qq %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7b,0xf5]
|
||||
vcvtph2qq %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2qq {rn-sae}, %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7b,0xf5]
|
||||
vcvtph2qq {rn-sae}, %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2qq (%r9){1to8}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7b,0x31]
|
||||
vcvtph2qq (%r9){1to8}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2qq 2032(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7b,0x71,0x7f]
|
||||
vcvtph2qq 2032(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7b,0x72,0x80]
|
||||
vcvtph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2udq %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x79,0xf5]
|
||||
vcvtph2udq %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2udq {rn-sae}, %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x79,0xf5]
|
||||
vcvtph2udq {rn-sae}, %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2udq (%r9){1to16}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x79,0x31]
|
||||
vcvtph2udq (%r9){1to16}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2udq 4064(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x79,0x71,0x7f]
|
||||
vcvtph2udq 4064(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x79,0x72,0x80]
|
||||
vcvtph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2uqq %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x79,0xf5]
|
||||
vcvtph2uqq %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2uqq {rn-sae}, %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x79,0xf5]
|
||||
vcvtph2uqq {rn-sae}, %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2uqq (%r9){1to8}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x79,0x31]
|
||||
vcvtph2uqq (%r9){1to8}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2uqq 2032(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x79,0x71,0x7f]
|
||||
vcvtph2uqq 2032(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x79,0x72,0x80]
|
||||
vcvtph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2uw %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x7d,0xf5]
|
||||
vcvtph2uw %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2uw {rn-sae}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x7d,0xf5]
|
||||
vcvtph2uw {rn-sae}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2uw (%r9){1to32}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x7d,0x31]
|
||||
vcvtph2uw (%r9){1to32}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2uw 8128(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x7d,0x71,0x7f]
|
||||
vcvtph2uw 8128(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x7d,0x72,0x80]
|
||||
vcvtph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2w %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7d,0xf5]
|
||||
vcvtph2w %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2w {rn-sae}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7d,0xf5]
|
||||
vcvtph2w {rn-sae}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtph2w (%r9){1to32}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7d,0x31]
|
||||
vcvtph2w (%r9){1to32}, %zmm30
|
||||
|
||||
// CHECK: vcvtph2w 8128(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7d,0x71,0x7f]
|
||||
vcvtph2w 8128(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7d,0x72,0x80]
|
||||
vcvtph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtps2phx %zmm29, %ymm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x1d,0xf5]
|
||||
vcvtps2phx %zmm29, %ymm30
|
||||
|
||||
// CHECK: vcvtps2phx {rn-sae}, %zmm29, %ymm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x1d,0xf5]
|
||||
vcvtps2phx {rn-sae}, %zmm29, %ymm30
|
||||
|
||||
// CHECK: vcvtps2phx 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtps2phx 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
|
||||
// CHECK: vcvtps2phx (%r9){1to16}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x1d,0x31]
|
||||
vcvtps2phx (%r9){1to16}, %ymm30
|
||||
|
||||
// CHECK: vcvtps2phx 8128(%rcx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x1d,0x71,0x7f]
|
||||
vcvtps2phx 8128(%rcx), %ymm30
|
||||
|
||||
// CHECK: vcvtps2phx -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x1d,0x72,0x80]
|
||||
vcvtps2phx -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtqq2ph %zmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0xfc,0x48,0x5b,0xf5]
|
||||
vcvtqq2ph %zmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtqq2ph {rn-sae}, %zmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0xfc,0x18,0x5b,0xf5]
|
||||
vcvtqq2ph {rn-sae}, %zmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0xfc,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtqq2ph (%r9){1to8}, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0xfc,0x58,0x5b,0x31]
|
||||
vcvtqq2ph (%r9){1to8}, %xmm30
|
||||
|
||||
// CHECK: vcvtqq2phz 8128(%rcx), %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0xfc,0x48,0x5b,0x71,0x7f]
|
||||
vcvtqq2phz 8128(%rcx), %xmm30
|
||||
|
||||
// CHECK: vcvtqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0xfc,0xdf,0x5b,0x72,0x80]
|
||||
vcvtqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtsd2sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x97,0x00,0x5a,0xf4]
|
||||
vcvtsd2sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x97,0x10,0x5a,0xf4]
|
||||
vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsd2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x97,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsd2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtsd2sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x97,0x00,0x5a,0x31]
|
||||
vcvtsd2sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsd2sh 1016(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x97,0x00,0x5a,0x71,0x7f]
|
||||
vcvtsd2sh 1016(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsd2sh -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x97,0x87,0x5a,0x72,0x80]
|
||||
vcvtsd2sh -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtsh2sd %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x16,0x00,0x5a,0xf4]
|
||||
vcvtsh2sd %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x16,0x10,0x5a,0xf4]
|
||||
vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2sd 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x16,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2sd 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtsh2sd (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x16,0x00,0x5a,0x31]
|
||||
vcvtsh2sd (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2sd 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x5a,0x71,0x7f]
|
||||
vcvtsh2sd 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2sd -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x87,0x5a,0x72,0x80]
|
||||
vcvtsh2sd -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtsh2si %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x2d,0xd6]
|
||||
vcvtsh2si %xmm30, %edx
|
||||
|
||||
// CHECK: vcvtsh2si {rn-sae}, %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x2d,0xd6]
|
||||
vcvtsh2si {rn-sae}, %xmm30, %edx
|
||||
|
||||
// CHECK: vcvtsh2si %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x2d,0xe6]
|
||||
vcvtsh2si %xmm30, %r12
|
||||
|
||||
// CHECK: vcvtsh2si {rn-sae}, %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x2d,0xe6]
|
||||
vcvtsh2si {rn-sae}, %xmm30, %r12
|
||||
|
||||
// CHECK: vcvtsh2si 268435456(%rbp,%r14,8), %edx
|
||||
// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x2d,0x94,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2si 268435456(%rbp,%r14,8), %edx
|
||||
|
||||
// CHECK: vcvtsh2si (%r9), %edx
|
||||
// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x2d,0x11]
|
||||
vcvtsh2si (%r9), %edx
|
||||
|
||||
// CHECK: vcvtsh2si 254(%rcx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f]
|
||||
vcvtsh2si 254(%rcx), %edx
|
||||
|
||||
// CHECK: vcvtsh2si -256(%rdx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80]
|
||||
vcvtsh2si -256(%rdx), %edx
|
||||
|
||||
// CHECK: vcvtsh2si 268435456(%rbp,%r14,8), %r12
|
||||
// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x2d,0xa4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2si 268435456(%rbp,%r14,8), %r12
|
||||
|
||||
// CHECK: vcvtsh2si (%r9), %r12
|
||||
// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x2d,0x21]
|
||||
vcvtsh2si (%r9), %r12
|
||||
|
||||
// CHECK: vcvtsh2si 254(%rcx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2d,0x61,0x7f]
|
||||
vcvtsh2si 254(%rcx), %r12
|
||||
|
||||
// CHECK: vcvtsh2si -256(%rdx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2d,0x62,0x80]
|
||||
vcvtsh2si -256(%rdx), %r12
|
||||
|
||||
// CHECK: vcvtsh2ss %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x14,0x00,0x13,0xf4]
|
||||
vcvtsh2ss %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x14,0x10,0x13,0xf4]
|
||||
vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2ss 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x14,0x07,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2ss 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtsh2ss (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x14,0x00,0x13,0x31]
|
||||
vcvtsh2ss (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2ss 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x14,0x00,0x13,0x71,0x7f]
|
||||
vcvtsh2ss 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsh2ss -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x14,0x87,0x13,0x72,0x80]
|
||||
vcvtsh2ss -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtsh2usi %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x79,0xd6]
|
||||
vcvtsh2usi %xmm30, %edx
|
||||
|
||||
// CHECK: vcvtsh2usi {rn-sae}, %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x79,0xd6]
|
||||
vcvtsh2usi {rn-sae}, %xmm30, %edx
|
||||
|
||||
// CHECK: vcvtsh2usi %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x79,0xe6]
|
||||
vcvtsh2usi %xmm30, %r12
|
||||
|
||||
// CHECK: vcvtsh2usi {rn-sae}, %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x79,0xe6]
|
||||
vcvtsh2usi {rn-sae}, %xmm30, %r12
|
||||
|
||||
// CHECK: vcvtsh2usi 268435456(%rbp,%r14,8), %edx
|
||||
// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x79,0x94,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2usi 268435456(%rbp,%r14,8), %edx
|
||||
|
||||
// CHECK: vcvtsh2usi (%r9), %edx
|
||||
// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x79,0x11]
|
||||
vcvtsh2usi (%r9), %edx
|
||||
|
||||
// CHECK: vcvtsh2usi 254(%rcx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f]
|
||||
vcvtsh2usi 254(%rcx), %edx
|
||||
|
||||
// CHECK: vcvtsh2usi -256(%rdx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x52,0x80]
|
||||
vcvtsh2usi -256(%rdx), %edx
|
||||
|
||||
// CHECK: vcvtsh2usi 268435456(%rbp,%r14,8), %r12
|
||||
// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x79,0xa4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2usi 268435456(%rbp,%r14,8), %r12
|
||||
|
||||
// CHECK: vcvtsh2usi (%r9), %r12
|
||||
// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x79,0x21]
|
||||
vcvtsh2usi (%r9), %r12
|
||||
|
||||
// CHECK: vcvtsh2usi 254(%rcx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x79,0x61,0x7f]
|
||||
vcvtsh2usi 254(%rcx), %r12
|
||||
|
||||
// CHECK: vcvtsh2usi -256(%rdx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x79,0x62,0x80]
|
||||
vcvtsh2usi -256(%rdx), %r12
|
||||
|
||||
// CHECK: vcvtsi2sh %r12, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x96,0x00,0x2a,0xf4]
|
||||
vcvtsi2sh %r12, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x96,0x10,0x2a,0xf4]
|
||||
vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2sh %edx, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0xf2]
|
||||
vcvtsi2sh %edx, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x10,0x2a,0xf2]
|
||||
vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x25,0x16,0x00,0x2a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtsi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2shl (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x16,0x00,0x2a,0x31]
|
||||
vcvtsi2shl (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2shl 508(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0x71,0x7f]
|
||||
vcvtsi2shl 508(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2shl -512(%rdx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0x72,0x80]
|
||||
vcvtsi2shl -512(%rdx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2shq 1016(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x96,0x00,0x2a,0x71,0x7f]
|
||||
vcvtsi2shq 1016(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtsi2shq -1024(%rdx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x96,0x00,0x2a,0x72,0x80]
|
||||
vcvtsi2shq -1024(%rdx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtss2sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x14,0x00,0x1d,0xf4]
|
||||
vcvtss2sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x14,0x10,0x1d,0xf4]
|
||||
vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtss2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x14,0x07,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtss2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtss2sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x14,0x00,0x1d,0x31]
|
||||
vcvtss2sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtss2sh 508(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x14,0x00,0x1d,0x71,0x7f]
|
||||
vcvtss2sh 508(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtss2sh -512(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x14,0x87,0x1d,0x72,0x80]
|
||||
vcvtss2sh -512(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2dq %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x48,0x5b,0xf5]
|
||||
vcvttph2dq %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2dq {sae}, %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x18,0x5b,0xf5]
|
||||
vcvttph2dq {sae}, %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7e,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvttph2dq (%r9){1to16}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7e,0x58,0x5b,0x31]
|
||||
vcvttph2dq (%r9){1to16}, %zmm30
|
||||
|
||||
// CHECK: vcvttph2dq 4064(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x48,0x5b,0x71,0x7f]
|
||||
vcvttph2dq 4064(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvttph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0xdf,0x5b,0x72,0x80]
|
||||
vcvttph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2qq %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7a,0xf5]
|
||||
vcvttph2qq %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2qq {sae}, %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7a,0xf5]
|
||||
vcvttph2qq {sae}, %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvttph2qq (%r9){1to8}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7a,0x31]
|
||||
vcvttph2qq (%r9){1to8}, %zmm30
|
||||
|
||||
// CHECK: vcvttph2qq 2032(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7a,0x71,0x7f]
|
||||
vcvttph2qq 2032(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvttph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7a,0x72,0x80]
|
||||
vcvttph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2udq %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x78,0xf5]
|
||||
vcvttph2udq %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2udq {sae}, %ymm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x78,0xf5]
|
||||
vcvttph2udq {sae}, %ymm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvttph2udq (%r9){1to16}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x78,0x31]
|
||||
vcvttph2udq (%r9){1to16}, %zmm30
|
||||
|
||||
// CHECK: vcvttph2udq 4064(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x78,0x71,0x7f]
|
||||
vcvttph2udq 4064(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvttph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x78,0x72,0x80]
|
||||
vcvttph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2uqq %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x78,0xf5]
|
||||
vcvttph2uqq %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2uqq {sae}, %xmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x78,0xf5]
|
||||
vcvttph2uqq {sae}, %xmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvttph2uqq (%r9){1to8}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x78,0x31]
|
||||
vcvttph2uqq (%r9){1to8}, %zmm30
|
||||
|
||||
// CHECK: vcvttph2uqq 2032(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x78,0x71,0x7f]
|
||||
vcvttph2uqq 2032(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvttph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x78,0x72,0x80]
|
||||
vcvttph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2uw %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x7c,0xf5]
|
||||
vcvttph2uw %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2uw {sae}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x7c,0xf5]
|
||||
vcvttph2uw {sae}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvttph2uw (%r9){1to32}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x7c,0x31]
|
||||
vcvttph2uw (%r9){1to32}, %zmm30
|
||||
|
||||
// CHECK: vcvttph2uw 8128(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x7c,0x71,0x7f]
|
||||
vcvttph2uw 8128(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvttph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x7c,0x72,0x80]
|
||||
vcvttph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2w %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7c,0xf5]
|
||||
vcvttph2w %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2w {sae}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7c,0xf5]
|
||||
vcvttph2w {sae}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvttph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvttph2w (%r9){1to32}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7c,0x31]
|
||||
vcvttph2w (%r9){1to32}, %zmm30
|
||||
|
||||
// CHECK: vcvttph2w 8128(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7c,0x71,0x7f]
|
||||
vcvttph2w 8128(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvttph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7c,0x72,0x80]
|
||||
vcvttph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttsh2si %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x2c,0xd6]
|
||||
vcvttsh2si %xmm30, %edx
|
||||
|
||||
// CHECK: vcvttsh2si {sae}, %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x2c,0xd6]
|
||||
vcvttsh2si {sae}, %xmm30, %edx
|
||||
|
||||
// CHECK: vcvttsh2si %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x2c,0xe6]
|
||||
vcvttsh2si %xmm30, %r12
|
||||
|
||||
// CHECK: vcvttsh2si {sae}, %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x2c,0xe6]
|
||||
vcvttsh2si {sae}, %xmm30, %r12
|
||||
|
||||
// CHECK: vcvttsh2si 268435456(%rbp,%r14,8), %edx
|
||||
// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x2c,0x94,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttsh2si 268435456(%rbp,%r14,8), %edx
|
||||
|
||||
// CHECK: vcvttsh2si (%r9), %edx
|
||||
// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x2c,0x11]
|
||||
vcvttsh2si (%r9), %edx
|
||||
|
||||
// CHECK: vcvttsh2si 254(%rcx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f]
|
||||
vcvttsh2si 254(%rcx), %edx
|
||||
|
||||
// CHECK: vcvttsh2si -256(%rdx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80]
|
||||
vcvttsh2si -256(%rdx), %edx
|
||||
|
||||
// CHECK: vcvttsh2si 268435456(%rbp,%r14,8), %r12
|
||||
// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x2c,0xa4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttsh2si 268435456(%rbp,%r14,8), %r12
|
||||
|
||||
// CHECK: vcvttsh2si (%r9), %r12
|
||||
// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x2c,0x21]
|
||||
vcvttsh2si (%r9), %r12
|
||||
|
||||
// CHECK: vcvttsh2si 254(%rcx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2c,0x61,0x7f]
|
||||
vcvttsh2si 254(%rcx), %r12
|
||||
|
||||
// CHECK: vcvttsh2si -256(%rdx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2c,0x62,0x80]
|
||||
vcvttsh2si -256(%rdx), %r12
|
||||
|
||||
// CHECK: vcvttsh2usi %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x78,0xd6]
|
||||
vcvttsh2usi %xmm30, %edx
|
||||
|
||||
// CHECK: vcvttsh2usi {sae}, %xmm30, %edx
|
||||
// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x78,0xd6]
|
||||
vcvttsh2usi {sae}, %xmm30, %edx
|
||||
|
||||
// CHECK: vcvttsh2usi %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x78,0xe6]
|
||||
vcvttsh2usi %xmm30, %r12
|
||||
|
||||
// CHECK: vcvttsh2usi {sae}, %xmm30, %r12
|
||||
// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x78,0xe6]
|
||||
vcvttsh2usi {sae}, %xmm30, %r12
|
||||
|
||||
// CHECK: vcvttsh2usi 268435456(%rbp,%r14,8), %edx
|
||||
// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x78,0x94,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttsh2usi 268435456(%rbp,%r14,8), %edx
|
||||
|
||||
// CHECK: vcvttsh2usi (%r9), %edx
|
||||
// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x78,0x11]
|
||||
vcvttsh2usi (%r9), %edx
|
||||
|
||||
// CHECK: vcvttsh2usi 254(%rcx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f]
|
||||
vcvttsh2usi 254(%rcx), %edx
|
||||
|
||||
// CHECK: vcvttsh2usi -256(%rdx), %edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x52,0x80]
|
||||
vcvttsh2usi -256(%rdx), %edx
|
||||
|
||||
// CHECK: vcvttsh2usi 268435456(%rbp,%r14,8), %r12
|
||||
// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x78,0xa4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttsh2usi 268435456(%rbp,%r14,8), %r12
|
||||
|
||||
// CHECK: vcvttsh2usi (%r9), %r12
|
||||
// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x78,0x21]
|
||||
vcvttsh2usi (%r9), %r12
|
||||
|
||||
// CHECK: vcvttsh2usi 254(%rcx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x78,0x61,0x7f]
|
||||
vcvttsh2usi 254(%rcx), %r12
|
||||
|
||||
// CHECK: vcvttsh2usi -256(%rdx), %r12
|
||||
// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x78,0x62,0x80]
|
||||
vcvttsh2usi -256(%rdx), %r12
|
||||
|
||||
// CHECK: vcvtudq2ph %zmm29, %ymm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x48,0x7a,0xf5]
|
||||
vcvtudq2ph %zmm29, %ymm30
|
||||
|
||||
// CHECK: vcvtudq2ph {rn-sae}, %zmm29, %ymm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x18,0x7a,0xf5]
|
||||
vcvtudq2ph {rn-sae}, %zmm29, %ymm30
|
||||
|
||||
// CHECK: vcvtudq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7f,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtudq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
|
||||
|
||||
// CHECK: vcvtudq2ph (%r9){1to16}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7f,0x58,0x7a,0x31]
|
||||
vcvtudq2ph (%r9){1to16}, %ymm30
|
||||
|
||||
// CHECK: vcvtudq2ph 8128(%rcx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x48,0x7a,0x71,0x7f]
|
||||
vcvtudq2ph 8128(%rcx), %ymm30
|
||||
|
||||
// CHECK: vcvtudq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0xdf,0x7a,0x72,0x80]
|
||||
vcvtudq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtuqq2ph %zmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0xff,0x48,0x7a,0xf5]
|
||||
vcvtuqq2ph %zmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x05,0xff,0x18,0x7a,0xf5]
|
||||
vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtuqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0xff,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtuqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtuqq2ph (%r9){1to8}, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0xff,0x58,0x7a,0x31]
|
||||
vcvtuqq2ph (%r9){1to8}, %xmm30
|
||||
|
||||
// CHECK: vcvtuqq2phz 8128(%rcx), %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0xff,0x48,0x7a,0x71,0x7f]
|
||||
vcvtuqq2phz 8128(%rcx), %xmm30
|
||||
|
||||
// CHECK: vcvtuqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0xff,0xdf,0x7a,0x72,0x80]
|
||||
vcvtuqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtusi2sh %r12, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x96,0x00,0x7b,0xf4]
|
||||
vcvtusi2sh %r12, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x96,0x10,0x7b,0xf4]
|
||||
vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2sh %edx, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0xf2]
|
||||
vcvtusi2sh %edx, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x10,0x7b,0xf2]
|
||||
vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x25,0x16,0x00,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtusi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2shl (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x16,0x00,0x7b,0x31]
|
||||
vcvtusi2shl (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2shl 508(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0x71,0x7f]
|
||||
vcvtusi2shl 508(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2shl -512(%rdx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0x72,0x80]
|
||||
vcvtusi2shl -512(%rdx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2shq 1016(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x96,0x00,0x7b,0x71,0x7f]
|
||||
vcvtusi2shq 1016(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtusi2shq -1024(%rdx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x96,0x00,0x7b,0x72,0x80]
|
||||
vcvtusi2shq -1024(%rdx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vcvtuw2ph %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x48,0x7d,0xf5]
|
||||
vcvtuw2ph %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtuw2ph {rn-sae}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x18,0x7d,0xf5]
|
||||
vcvtuw2ph {rn-sae}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtuw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7f,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtuw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtuw2ph (%r9){1to32}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7f,0x58,0x7d,0x31]
|
||||
vcvtuw2ph (%r9){1to32}, %zmm30
|
||||
|
||||
// CHECK: vcvtuw2ph 8128(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x48,0x7d,0x71,0x7f]
|
||||
vcvtuw2ph 8128(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtuw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0xdf,0x7d,0x72,0x80]
|
||||
vcvtuw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtw2ph %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x48,0x7d,0xf5]
|
||||
vcvtw2ph %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtw2ph {rn-sae}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x18,0x7d,0xf5]
|
||||
vcvtw2ph {rn-sae}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vcvtw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x25,0x7e,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
|
||||
|
||||
// CHECK: vcvtw2ph (%r9){1to32}, %zmm30
|
||||
// CHECK: encoding: [0x62,0x45,0x7e,0x58,0x7d,0x31]
|
||||
vcvtw2ph (%r9){1to32}, %zmm30
|
||||
|
||||
// CHECK: vcvtw2ph 8128(%rcx), %zmm30
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x48,0x7d,0x71,0x7f]
|
||||
vcvtw2ph 8128(%rcx), %zmm30
|
||||
|
||||
// CHECK: vcvtw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0xdf,0x7d,0x72,0x80]
|
||||
vcvtw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
|
||||
|
|
|
@ -279,3 +279,859 @@
|
|||
// CHECK: vsubph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x9f,0x5c,0x72,0x80]
|
||||
vsubph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtdq2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5b,0xf5]
|
||||
vcvtdq2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtdq2ph %ymm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5b,0xf5]
|
||||
vcvtdq2ph %ymm5, %xmm6
|
||||
|
||||
// CHECK: vcvtdq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtdq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtdq2ph (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5b,0x31]
|
||||
vcvtdq2ph (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtdq2phx 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5b,0x71,0x7f]
|
||||
vcvtdq2phx 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtdq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x5b,0x72,0x80]
|
||||
vcvtdq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtdq2ph (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x5b,0x31]
|
||||
vcvtdq2ph (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtdq2phy 4064(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5b,0x71,0x7f]
|
||||
vcvtdq2phy 4064(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtdq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x5b,0x72,0x80]
|
||||
vcvtdq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtpd2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x5a,0xf5]
|
||||
vcvtpd2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtpd2ph %ymm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x5a,0xf5]
|
||||
vcvtpd2ph %ymm5, %xmm6
|
||||
|
||||
// CHECK: vcvtpd2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtpd2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtpd2ph (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x5a,0x31]
|
||||
vcvtpd2ph (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvtpd2phx 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x5a,0x71,0x7f]
|
||||
vcvtpd2phx 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtpd2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x5a,0x72,0x80]
|
||||
vcvtpd2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtpd2ph (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x5a,0x31]
|
||||
vcvtpd2ph (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtpd2phy 4064(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x5a,0x71,0x7f]
|
||||
vcvtpd2phy 4064(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtpd2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x5a,0x72,0x80]
|
||||
vcvtpd2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2dq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x5b,0xf5]
|
||||
vcvtph2dq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2dq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x5b,0xf5]
|
||||
vcvtph2dq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2dq (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x5b,0x31]
|
||||
vcvtph2dq (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2dq 1016(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x5b,0x71,0x7f]
|
||||
vcvtph2dq 1016(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x5b,0x72,0x80]
|
||||
vcvtph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2dq (%ecx){1to8}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x5b,0x31]
|
||||
vcvtph2dq (%ecx){1to8}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2dq 2032(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x5b,0x71,0x7f]
|
||||
vcvtph2dq 2032(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x5b,0x72,0x80]
|
||||
vcvtph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2pd %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5a,0xf5]
|
||||
vcvtph2pd %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2pd %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5a,0xf5]
|
||||
vcvtph2pd %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2pd 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2pd 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2pd (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5a,0x31]
|
||||
vcvtph2pd (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2pd 508(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5a,0x71,0x7f]
|
||||
vcvtph2pd 508(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2pd -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x5a,0x72,0x80]
|
||||
vcvtph2pd -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2pd 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2pd 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2pd (%ecx){1to4}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x5a,0x31]
|
||||
vcvtph2pd (%ecx){1to4}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2pd 1016(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5a,0x71,0x7f]
|
||||
vcvtph2pd 1016(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2pd -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x5a,0x72,0x80]
|
||||
vcvtph2pd -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2psx %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x08,0x13,0xf5]
|
||||
vcvtph2psx %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2psx %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x28,0x13,0xf5]
|
||||
vcvtph2psx %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2psx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2psx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2psx (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x18,0x13,0x31]
|
||||
vcvtph2psx (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2psx 1016(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x08,0x13,0x71,0x7f]
|
||||
vcvtph2psx 1016(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2psx -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x9f,0x13,0x72,0x80]
|
||||
vcvtph2psx -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2psx 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x2f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2psx 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2psx (%ecx){1to8}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x38,0x13,0x31]
|
||||
vcvtph2psx (%ecx){1to8}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2psx 2032(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x28,0x13,0x71,0x7f]
|
||||
vcvtph2psx 2032(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2psx -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0xbf,0x13,0x72,0x80]
|
||||
vcvtph2psx -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2qq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7b,0xf5]
|
||||
vcvtph2qq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2qq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7b,0xf5]
|
||||
vcvtph2qq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2qq (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7b,0x31]
|
||||
vcvtph2qq (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2qq 508(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7b,0x71,0x7f]
|
||||
vcvtph2qq 508(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7b,0x72,0x80]
|
||||
vcvtph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2qq (%ecx){1to4}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7b,0x31]
|
||||
vcvtph2qq (%ecx){1to4}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2qq 1016(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7b,0x71,0x7f]
|
||||
vcvtph2qq 1016(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7b,0x72,0x80]
|
||||
vcvtph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2udq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x79,0xf5]
|
||||
vcvtph2udq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2udq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x79,0xf5]
|
||||
vcvtph2udq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2udq (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x79,0x31]
|
||||
vcvtph2udq (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2udq 1016(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x79,0x71,0x7f]
|
||||
vcvtph2udq 1016(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x79,0x72,0x80]
|
||||
vcvtph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2udq (%ecx){1to8}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x79,0x31]
|
||||
vcvtph2udq (%ecx){1to8}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2udq 2032(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x79,0x71,0x7f]
|
||||
vcvtph2udq 2032(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x79,0x72,0x80]
|
||||
vcvtph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2uqq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x79,0xf5]
|
||||
vcvtph2uqq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2uqq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x79,0xf5]
|
||||
vcvtph2uqq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2uqq (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x79,0x31]
|
||||
vcvtph2uqq (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2uqq 508(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x79,0x71,0x7f]
|
||||
vcvtph2uqq 508(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x79,0x72,0x80]
|
||||
vcvtph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2uqq (%ecx){1to4}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x79,0x31]
|
||||
vcvtph2uqq (%ecx){1to4}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2uqq 1016(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x79,0x71,0x7f]
|
||||
vcvtph2uqq 1016(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x79,0x72,0x80]
|
||||
vcvtph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2uw %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7d,0xf5]
|
||||
vcvtph2uw %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2uw %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7d,0xf5]
|
||||
vcvtph2uw %ymm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2uw (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7d,0x31]
|
||||
vcvtph2uw (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2uw 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7d,0x71,0x7f]
|
||||
vcvtph2uw 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x7d,0x72,0x80]
|
||||
vcvtph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2uw (%ecx){1to16}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x7d,0x31]
|
||||
vcvtph2uw (%ecx){1to16}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2uw 4064(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7d,0x71,0x7f]
|
||||
vcvtph2uw 4064(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x7d,0x72,0x80]
|
||||
vcvtph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2w %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7d,0xf5]
|
||||
vcvtph2w %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtph2w %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7d,0xf5]
|
||||
vcvtph2w %ymm5, %ymm6
|
||||
|
||||
// CHECK: vcvtph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2w (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7d,0x31]
|
||||
vcvtph2w (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtph2w 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7d,0x71,0x7f]
|
||||
vcvtph2w 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7d,0x72,0x80]
|
||||
vcvtph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtph2w (%ecx){1to16}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7d,0x31]
|
||||
vcvtph2w (%ecx){1to16}, %ymm6
|
||||
|
||||
// CHECK: vcvtph2w 4064(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7d,0x71,0x7f]
|
||||
vcvtph2w 4064(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7d,0x72,0x80]
|
||||
vcvtph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtps2phx %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x1d,0xf5]
|
||||
vcvtps2phx %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtps2phx %ymm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x1d,0xf5]
|
||||
vcvtps2phx %ymm5, %xmm6
|
||||
|
||||
// CHECK: vcvtps2phxx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtps2phxx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtps2phx (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x1d,0x31]
|
||||
vcvtps2phx (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtps2phxx 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x1d,0x71,0x7f]
|
||||
vcvtps2phxx 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtps2phx -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x1d,0x72,0x80]
|
||||
vcvtps2phx -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtps2phx (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x1d,0x31]
|
||||
vcvtps2phx (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtps2phxy 4064(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x1d,0x71,0x7f]
|
||||
vcvtps2phxy 4064(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtps2phx -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x1d,0x72,0x80]
|
||||
vcvtps2phx -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtqq2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x5b,0xf5]
|
||||
vcvtqq2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtqq2ph %ymm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x5b,0xf5]
|
||||
vcvtqq2ph %ymm5, %xmm6
|
||||
|
||||
// CHECK: vcvtqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtqq2ph (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x5b,0x31]
|
||||
vcvtqq2ph (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvtqq2phx 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x5b,0x71,0x7f]
|
||||
vcvtqq2phx 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x5b,0x72,0x80]
|
||||
vcvtqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtqq2ph (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x5b,0x31]
|
||||
vcvtqq2ph (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtqq2phy 4064(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x5b,0x71,0x7f]
|
||||
vcvtqq2phy 4064(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x5b,0x72,0x80]
|
||||
vcvtqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2dq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x5b,0xf5]
|
||||
vcvttph2dq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvttph2dq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x5b,0xf5]
|
||||
vcvttph2dq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvttph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2dq (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x5b,0x31]
|
||||
vcvttph2dq (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvttph2dq 1016(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x5b,0x71,0x7f]
|
||||
vcvttph2dq 1016(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvttph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x9f,0x5b,0x72,0x80]
|
||||
vcvttph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2dq (%ecx){1to8}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x38,0x5b,0x31]
|
||||
vcvttph2dq (%ecx){1to8}, %ymm6
|
||||
|
||||
// CHECK: vcvttph2dq 2032(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x5b,0x71,0x7f]
|
||||
vcvttph2dq 2032(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvttph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0xbf,0x5b,0x72,0x80]
|
||||
vcvttph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2qq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7a,0xf5]
|
||||
vcvttph2qq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvttph2qq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7a,0xf5]
|
||||
vcvttph2qq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvttph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2qq (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7a,0x31]
|
||||
vcvttph2qq (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvttph2qq 508(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7a,0x71,0x7f]
|
||||
vcvttph2qq 508(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvttph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7a,0x72,0x80]
|
||||
vcvttph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2qq (%ecx){1to4}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7a,0x31]
|
||||
vcvttph2qq (%ecx){1to4}, %ymm6
|
||||
|
||||
// CHECK: vcvttph2qq 1016(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7a,0x71,0x7f]
|
||||
vcvttph2qq 1016(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvttph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7a,0x72,0x80]
|
||||
vcvttph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2udq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x78,0xf5]
|
||||
vcvttph2udq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvttph2udq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x78,0xf5]
|
||||
vcvttph2udq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvttph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2udq (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x78,0x31]
|
||||
vcvttph2udq (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvttph2udq 1016(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x78,0x71,0x7f]
|
||||
vcvttph2udq 1016(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvttph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x78,0x72,0x80]
|
||||
vcvttph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2udq (%ecx){1to8}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x78,0x31]
|
||||
vcvttph2udq (%ecx){1to8}, %ymm6
|
||||
|
||||
// CHECK: vcvttph2udq 2032(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x78,0x71,0x7f]
|
||||
vcvttph2udq 2032(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvttph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x78,0x72,0x80]
|
||||
vcvttph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2uqq %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x78,0xf5]
|
||||
vcvttph2uqq %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvttph2uqq %xmm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x78,0xf5]
|
||||
vcvttph2uqq %xmm5, %ymm6
|
||||
|
||||
// CHECK: vcvttph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2uqq (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x78,0x31]
|
||||
vcvttph2uqq (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvttph2uqq 508(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x78,0x71,0x7f]
|
||||
vcvttph2uqq 508(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvttph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x78,0x72,0x80]
|
||||
vcvttph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2uqq (%ecx){1to4}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x78,0x31]
|
||||
vcvttph2uqq (%ecx){1to4}, %ymm6
|
||||
|
||||
// CHECK: vcvttph2uqq 1016(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x78,0x71,0x7f]
|
||||
vcvttph2uqq 1016(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvttph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x78,0x72,0x80]
|
||||
vcvttph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2uw %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7c,0xf5]
|
||||
vcvttph2uw %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvttph2uw %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7c,0xf5]
|
||||
vcvttph2uw %ymm5, %ymm6
|
||||
|
||||
// CHECK: vcvttph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2uw (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7c,0x31]
|
||||
vcvttph2uw (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvttph2uw 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7c,0x71,0x7f]
|
||||
vcvttph2uw 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvttph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x7c,0x72,0x80]
|
||||
vcvttph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2uw (%ecx){1to16}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x7c,0x31]
|
||||
vcvttph2uw (%ecx){1to16}, %ymm6
|
||||
|
||||
// CHECK: vcvttph2uw 4064(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7c,0x71,0x7f]
|
||||
vcvttph2uw 4064(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvttph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x7c,0x72,0x80]
|
||||
vcvttph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2w %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7c,0xf5]
|
||||
vcvttph2w %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvttph2w %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7c,0xf5]
|
||||
vcvttph2w %ymm5, %ymm6
|
||||
|
||||
// CHECK: vcvttph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2w (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7c,0x31]
|
||||
vcvttph2w (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvttph2w 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7c,0x71,0x7f]
|
||||
vcvttph2w 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvttph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7c,0x72,0x80]
|
||||
vcvttph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvttph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvttph2w (%ecx){1to16}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7c,0x31]
|
||||
vcvttph2w (%ecx){1to16}, %ymm6
|
||||
|
||||
// CHECK: vcvttph2w 4064(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7c,0x71,0x7f]
|
||||
vcvttph2w 4064(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvttph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7c,0x72,0x80]
|
||||
vcvttph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtudq2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7a,0xf5]
|
||||
vcvtudq2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtudq2ph %ymm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7a,0xf5]
|
||||
vcvtudq2ph %ymm5, %xmm6
|
||||
|
||||
// CHECK: vcvtudq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtudq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtudq2ph (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7a,0x31]
|
||||
vcvtudq2ph (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtudq2phx 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7a,0x71,0x7f]
|
||||
vcvtudq2phx 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtudq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x9f,0x7a,0x72,0x80]
|
||||
vcvtudq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtudq2ph (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x38,0x7a,0x31]
|
||||
vcvtudq2ph (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtudq2phy 4064(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7a,0x71,0x7f]
|
||||
vcvtudq2phy 4064(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtudq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0xbf,0x7a,0x72,0x80]
|
||||
vcvtudq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtuqq2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x08,0x7a,0xf5]
|
||||
vcvtuqq2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtuqq2ph %ymm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x28,0x7a,0xf5]
|
||||
vcvtuqq2ph %ymm5, %xmm6
|
||||
|
||||
// CHECK: vcvtuqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtuqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtuqq2ph (%ecx){1to2}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x18,0x7a,0x31]
|
||||
vcvtuqq2ph (%ecx){1to2}, %xmm6
|
||||
|
||||
// CHECK: vcvtuqq2phx 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x08,0x7a,0x71,0x7f]
|
||||
vcvtuqq2phx 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtuqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x9f,0x7a,0x72,0x80]
|
||||
vcvtuqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtuqq2ph (%ecx){1to4}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x38,0x7a,0x31]
|
||||
vcvtuqq2ph (%ecx){1to4}, %xmm6
|
||||
|
||||
// CHECK: vcvtuqq2phy 4064(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x28,0x7a,0x71,0x7f]
|
||||
vcvtuqq2phy 4064(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtuqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0xbf,0x7a,0x72,0x80]
|
||||
vcvtuqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtuw2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7d,0xf5]
|
||||
vcvtuw2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtuw2ph %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7d,0xf5]
|
||||
vcvtuw2ph %ymm5, %ymm6
|
||||
|
||||
// CHECK: vcvtuw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtuw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtuw2ph (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7d,0x31]
|
||||
vcvtuw2ph (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtuw2ph 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7d,0x71,0x7f]
|
||||
vcvtuw2ph 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtuw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x9f,0x7d,0x72,0x80]
|
||||
vcvtuw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtuw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtuw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtuw2ph (%ecx){1to16}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x38,0x7d,0x31]
|
||||
vcvtuw2ph (%ecx){1to16}, %ymm6
|
||||
|
||||
// CHECK: vcvtuw2ph 4064(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7d,0x71,0x7f]
|
||||
vcvtuw2ph 4064(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtuw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0xbf,0x7d,0x72,0x80]
|
||||
vcvtuw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtw2ph %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7d,0xf5]
|
||||
vcvtw2ph %xmm5, %xmm6
|
||||
|
||||
// CHECK: vcvtw2ph %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x7d,0xf5]
|
||||
vcvtw2ph %ymm5, %ymm6
|
||||
|
||||
// CHECK: vcvtw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
|
||||
|
||||
// CHECK: vcvtw2ph (%ecx){1to8}, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x7d,0x31]
|
||||
vcvtw2ph (%ecx){1to8}, %xmm6
|
||||
|
||||
// CHECK: vcvtw2ph 2032(%ecx), %xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7d,0x71,0x7f]
|
||||
vcvtw2ph 2032(%ecx), %xmm6
|
||||
|
||||
// CHECK: vcvtw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x9f,0x7d,0x72,0x80]
|
||||
vcvtw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vcvtw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
|
||||
|
||||
// CHECK: vcvtw2ph (%ecx){1to16}, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x38,0x7d,0x31]
|
||||
vcvtw2ph (%ecx){1to16}, %ymm6
|
||||
|
||||
// CHECK: vcvtw2ph 4064(%ecx), %ymm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x7d,0x71,0x7f]
|
||||
vcvtw2ph 4064(%ecx), %ymm6
|
||||
|
||||
// CHECK: vcvtw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0xbf,0x7d,0x72,0x80]
|
||||
vcvtw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
|
|
@ -459,3 +459,771 @@
|
|||
// CHECK: vucomish xmm6, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x2e,0x72,0x80]
|
||||
vucomish xmm6, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvtdq2ph ymm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5b,0xf5]
|
||||
vcvtdq2ph ymm6, zmm5
|
||||
|
||||
// CHECK: vcvtdq2ph ymm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5b,0xf5]
|
||||
vcvtdq2ph ymm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtdq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtdq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtdq2ph ymm6, dword ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x5b,0x31]
|
||||
vcvtdq2ph ymm6, dword ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvtdq2ph ymm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5b,0x71,0x7f]
|
||||
vcvtdq2ph ymm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtdq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x5b,0x72,0x80]
|
||||
vcvtdq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x5a,0xf5]
|
||||
vcvtpd2ph xmm6, zmm5
|
||||
|
||||
// CHECK: vcvtpd2ph xmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x5a,0xf5]
|
||||
vcvtpd2ph xmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtpd2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtpd2ph xmm6, qword ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x5a,0x31]
|
||||
vcvtpd2ph xmm6, qword ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x5a,0x71,0x7f]
|
||||
vcvtpd2ph xmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x5a,0x72,0x80]
|
||||
vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
|
||||
|
||||
// CHECK: vcvtph2dq zmm6, ymm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x5b,0xf5]
|
||||
vcvtph2dq zmm6, ymm5
|
||||
|
||||
// CHECK: vcvtph2dq zmm6, ymm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x5b,0xf5]
|
||||
vcvtph2dq zmm6, ymm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2dq zmm6, word ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x5b,0x31]
|
||||
vcvtph2dq zmm6, word ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvtph2dq zmm6, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x5b,0x71,0x7f]
|
||||
vcvtph2dq zmm6, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: vcvtph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x5b,0x72,0x80]
|
||||
vcvtph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtph2pd zmm6, xmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5a,0xf5]
|
||||
vcvtph2pd zmm6, xmm5
|
||||
|
||||
// CHECK: vcvtph2pd zmm6, xmm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5a,0xf5]
|
||||
vcvtph2pd zmm6, xmm5, {sae}
|
||||
|
||||
// CHECK: vcvtph2pd zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2pd zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2pd zmm6, word ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x5a,0x31]
|
||||
vcvtph2pd zmm6, word ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvtph2pd zmm6, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5a,0x71,0x7f]
|
||||
vcvtph2pd zmm6, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: vcvtph2pd zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x5a,0x72,0x80]
|
||||
vcvtph2pd zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2psx zmm6, ymm5
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x48,0x13,0xf5]
|
||||
vcvtph2psx zmm6, ymm5
|
||||
|
||||
// CHECK: vcvtph2psx zmm6, ymm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x18,0x13,0xf5]
|
||||
vcvtph2psx zmm6, ymm5, {sae}
|
||||
|
||||
// CHECK: vcvtph2psx zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x4f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2psx zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2psx zmm6, word ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x58,0x13,0x31]
|
||||
vcvtph2psx zmm6, word ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvtph2psx zmm6, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0x48,0x13,0x71,0x7f]
|
||||
vcvtph2psx zmm6, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: vcvtph2psx zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf6,0x7d,0xdf,0x13,0x72,0x80]
|
||||
vcvtph2psx zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtph2qq zmm6, xmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7b,0xf5]
|
||||
vcvtph2qq zmm6, xmm5
|
||||
|
||||
// CHECK: vcvtph2qq zmm6, xmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7b,0xf5]
|
||||
vcvtph2qq zmm6, xmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2qq zmm6, word ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7b,0x31]
|
||||
vcvtph2qq zmm6, word ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvtph2qq zmm6, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7b,0x71,0x7f]
|
||||
vcvtph2qq zmm6, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: vcvtph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7b,0x72,0x80]
|
||||
vcvtph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2udq zmm6, ymm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x79,0xf5]
|
||||
vcvtph2udq zmm6, ymm5
|
||||
|
||||
// CHECK: vcvtph2udq zmm6, ymm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x79,0xf5]
|
||||
vcvtph2udq zmm6, ymm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2udq zmm6, word ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x79,0x31]
|
||||
vcvtph2udq zmm6, word ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvtph2udq zmm6, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x79,0x71,0x7f]
|
||||
vcvtph2udq zmm6, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: vcvtph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x79,0x72,0x80]
|
||||
vcvtph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtph2uqq zmm6, xmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x79,0xf5]
|
||||
vcvtph2uqq zmm6, xmm5
|
||||
|
||||
// CHECK: vcvtph2uqq zmm6, xmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x79,0xf5]
|
||||
vcvtph2uqq zmm6, xmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2uqq zmm6, word ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x79,0x31]
|
||||
vcvtph2uqq zmm6, word ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvtph2uqq zmm6, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x79,0x71,0x7f]
|
||||
vcvtph2uqq zmm6, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: vcvtph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x79,0x72,0x80]
|
||||
vcvtph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2uw zmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7d,0xf5]
|
||||
vcvtph2uw zmm6, zmm5
|
||||
|
||||
// CHECK: vcvtph2uw zmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7d,0xf5]
|
||||
vcvtph2uw zmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2uw zmm6, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x7d,0x31]
|
||||
vcvtph2uw zmm6, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vcvtph2uw zmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7d,0x71,0x7f]
|
||||
vcvtph2uw zmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x7d,0x72,0x80]
|
||||
vcvtph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vcvtph2w zmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7d,0xf5]
|
||||
vcvtph2w zmm6, zmm5
|
||||
|
||||
// CHECK: vcvtph2w zmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7d,0xf5]
|
||||
vcvtph2w zmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtph2w zmm6, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7d,0x31]
|
||||
vcvtph2w zmm6, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vcvtph2w zmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7d,0x71,0x7f]
|
||||
vcvtph2w zmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7d,0x72,0x80]
|
||||
vcvtph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vcvtps2phx ymm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x1d,0xf5]
|
||||
vcvtps2phx ymm6, zmm5
|
||||
|
||||
// CHECK: vcvtps2phx ymm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x1d,0xf5]
|
||||
vcvtps2phx ymm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtps2phx ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtps2phx ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtps2phx ymm6, dword ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x1d,0x31]
|
||||
vcvtps2phx ymm6, dword ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvtps2phx ymm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x1d,0x71,0x7f]
|
||||
vcvtps2phx ymm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtps2phx ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x1d,0x72,0x80]
|
||||
vcvtps2phx ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x5b,0xf5]
|
||||
vcvtqq2ph xmm6, zmm5
|
||||
|
||||
// CHECK: vcvtqq2ph xmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x5b,0xf5]
|
||||
vcvtqq2ph xmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtqq2ph xmm6, qword ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x5b,0x31]
|
||||
vcvtqq2ph xmm6, qword ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x5b,0x71,0x7f]
|
||||
vcvtqq2ph xmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x5b,0x72,0x80]
|
||||
vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
|
||||
|
||||
// CHECK: vcvtsd2sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0xf4]
|
||||
vcvtsd2sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vcvtsd2sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0xd7,0x18,0x5a,0xf4]
|
||||
vcvtsd2sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vcvtsd2sh xmm6 {k7}, xmm5, qword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0xd7,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtsd2sh xmm6 {k7}, xmm5, qword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtsd2sh xmm6, xmm5, qword ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0x31]
|
||||
vcvtsd2sh xmm6, xmm5, qword ptr [ecx]
|
||||
|
||||
// CHECK: vcvtsd2sh xmm6, xmm5, qword ptr [ecx + 1016]
|
||||
// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0x71,0x7f]
|
||||
vcvtsd2sh xmm6, xmm5, qword ptr [ecx + 1016]
|
||||
|
||||
// CHECK: vcvtsd2sh xmm6 {k7} {z}, xmm5, qword ptr [edx - 1024]
|
||||
// CHECK: encoding: [0x62,0xf5,0xd7,0x8f,0x5a,0x72,0x80]
|
||||
vcvtsd2sh xmm6 {k7} {z}, xmm5, qword ptr [edx - 1024]
|
||||
|
||||
// CHECK: vcvtsh2sd xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0xf4]
|
||||
vcvtsh2sd xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vcvtsh2sd xmm6, xmm5, xmm4, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x5a,0xf4]
|
||||
vcvtsh2sd xmm6, xmm5, xmm4, {sae}
|
||||
|
||||
// CHECK: vcvtsh2sd xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2sd xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtsh2sd xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0x31]
|
||||
vcvtsh2sd xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vcvtsh2sd xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0x71,0x7f]
|
||||
vcvtsh2sd xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vcvtsh2sd xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x8f,0x5a,0x72,0x80]
|
||||
vcvtsh2sd xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvtsh2si edx, xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0xd6]
|
||||
vcvtsh2si edx, xmm6
|
||||
|
||||
// CHECK: vcvtsh2si edx, xmm6, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2d,0xd6]
|
||||
vcvtsh2si edx, xmm6, {rn-sae}
|
||||
|
||||
// CHECK: vcvtsh2si edx, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x94,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2si edx, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtsh2si edx, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x11]
|
||||
vcvtsh2si edx, word ptr [ecx]
|
||||
|
||||
// CHECK: vcvtsh2si edx, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f]
|
||||
vcvtsh2si edx, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vcvtsh2si edx, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80]
|
||||
vcvtsh2si edx, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvtsh2ss xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0xf4]
|
||||
vcvtsh2ss xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vcvtsh2ss xmm6, xmm5, xmm4, {sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x54,0x18,0x13,0xf4]
|
||||
vcvtsh2ss xmm6, xmm5, xmm4, {sae}
|
||||
|
||||
// CHECK: vcvtsh2ss xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x54,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2ss xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtsh2ss xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0x31]
|
||||
vcvtsh2ss xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vcvtsh2ss xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0x71,0x7f]
|
||||
vcvtsh2ss xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vcvtsh2ss xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x54,0x8f,0x13,0x72,0x80]
|
||||
vcvtsh2ss xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvtsh2usi edx, xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0xd6]
|
||||
vcvtsh2usi edx, xmm6
|
||||
|
||||
// CHECK: vcvtsh2usi edx, xmm6, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x79,0xd6]
|
||||
vcvtsh2usi edx, xmm6, {rn-sae}
|
||||
|
||||
// CHECK: vcvtsh2usi edx, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x94,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtsh2usi edx, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtsh2usi edx, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x11]
|
||||
vcvtsh2usi edx, word ptr [ecx]
|
||||
|
||||
// CHECK: vcvtsh2usi edx, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f]
|
||||
vcvtsh2usi edx, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vcvtsh2usi edx, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x52,0x80]
|
||||
vcvtsh2usi edx, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvtsi2sh xmm6, xmm5, edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0xf2]
|
||||
vcvtsi2sh xmm6, xmm5, edx
|
||||
|
||||
// CHECK: vcvtsi2sh xmm6, xmm5, {rn-sae}, edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x2a,0xf2]
|
||||
vcvtsi2sh xmm6, xmm5, {rn-sae}, edx
|
||||
|
||||
// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtsi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x31]
|
||||
vcvtsi2sh xmm6, xmm5, dword ptr [ecx]
|
||||
|
||||
// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [ecx + 508]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x71,0x7f]
|
||||
vcvtsi2sh xmm6, xmm5, dword ptr [ecx + 508]
|
||||
|
||||
// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [edx - 512]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x72,0x80]
|
||||
vcvtsi2sh xmm6, xmm5, dword ptr [edx - 512]
|
||||
|
||||
// CHECK: vcvtss2sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0xf4]
|
||||
vcvtss2sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vcvtss2sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x18,0x1d,0xf4]
|
||||
vcvtss2sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vcvtss2sh xmm6 {k7}, xmm5, dword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtss2sh xmm6 {k7}, xmm5, dword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtss2sh xmm6, xmm5, dword ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0x31]
|
||||
vcvtss2sh xmm6, xmm5, dword ptr [ecx]
|
||||
|
||||
// CHECK: vcvtss2sh xmm6, xmm5, dword ptr [ecx + 508]
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0x71,0x7f]
|
||||
vcvtss2sh xmm6, xmm5, dword ptr [ecx + 508]
|
||||
|
||||
// CHECK: vcvtss2sh xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]
|
||||
// CHECK: encoding: [0x62,0xf5,0x54,0x8f,0x1d,0x72,0x80]
|
||||
vcvtss2sh xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]
|
||||
|
||||
// CHECK: vcvttph2dq zmm6, ymm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x5b,0xf5]
|
||||
vcvttph2dq zmm6, ymm5
|
||||
|
||||
// CHECK: vcvttph2dq zmm6, ymm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x5b,0xf5]
|
||||
vcvttph2dq zmm6, ymm5, {sae}
|
||||
|
||||
// CHECK: vcvttph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttph2dq zmm6, word ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x58,0x5b,0x31]
|
||||
vcvttph2dq zmm6, word ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvttph2dq zmm6, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x5b,0x71,0x7f]
|
||||
vcvttph2dq zmm6, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: vcvttph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0xdf,0x5b,0x72,0x80]
|
||||
vcvttph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvttph2qq zmm6, xmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7a,0xf5]
|
||||
vcvttph2qq zmm6, xmm5
|
||||
|
||||
// CHECK: vcvttph2qq zmm6, xmm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7a,0xf5]
|
||||
vcvttph2qq zmm6, xmm5, {sae}
|
||||
|
||||
// CHECK: vcvttph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttph2qq zmm6, word ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7a,0x31]
|
||||
vcvttph2qq zmm6, word ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvttph2qq zmm6, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7a,0x71,0x7f]
|
||||
vcvttph2qq zmm6, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: vcvttph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7a,0x72,0x80]
|
||||
vcvttph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvttph2udq zmm6, ymm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x78,0xf5]
|
||||
vcvttph2udq zmm6, ymm5
|
||||
|
||||
// CHECK: vcvttph2udq zmm6, ymm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x78,0xf5]
|
||||
vcvttph2udq zmm6, ymm5, {sae}
|
||||
|
||||
// CHECK: vcvttph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttph2udq zmm6, word ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x78,0x31]
|
||||
vcvttph2udq zmm6, word ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvttph2udq zmm6, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x78,0x71,0x7f]
|
||||
vcvttph2udq zmm6, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: vcvttph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x78,0x72,0x80]
|
||||
vcvttph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvttph2uqq zmm6, xmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x78,0xf5]
|
||||
vcvttph2uqq zmm6, xmm5
|
||||
|
||||
// CHECK: vcvttph2uqq zmm6, xmm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x78,0xf5]
|
||||
vcvttph2uqq zmm6, xmm5, {sae}
|
||||
|
||||
// CHECK: vcvttph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttph2uqq zmm6, word ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x78,0x31]
|
||||
vcvttph2uqq zmm6, word ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvttph2uqq zmm6, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x78,0x71,0x7f]
|
||||
vcvttph2uqq zmm6, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: vcvttph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x78,0x72,0x80]
|
||||
vcvttph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvttph2uw zmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7c,0xf5]
|
||||
vcvttph2uw zmm6, zmm5
|
||||
|
||||
// CHECK: vcvttph2uw zmm6, zmm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7c,0xf5]
|
||||
vcvttph2uw zmm6, zmm5, {sae}
|
||||
|
||||
// CHECK: vcvttph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttph2uw zmm6, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x7c,0x31]
|
||||
vcvttph2uw zmm6, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vcvttph2uw zmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7c,0x71,0x7f]
|
||||
vcvttph2uw zmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvttph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x7c,0x72,0x80]
|
||||
vcvttph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vcvttph2w zmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7c,0xf5]
|
||||
vcvttph2w zmm6, zmm5
|
||||
|
||||
// CHECK: vcvttph2w zmm6, zmm5, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7c,0xf5]
|
||||
vcvttph2w zmm6, zmm5, {sae}
|
||||
|
||||
// CHECK: vcvttph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttph2w zmm6, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7c,0x31]
|
||||
vcvttph2w zmm6, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vcvttph2w zmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7c,0x71,0x7f]
|
||||
vcvttph2w zmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvttph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7c,0x72,0x80]
|
||||
vcvttph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vcvttsh2si edx, xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0xd6]
|
||||
vcvttsh2si edx, xmm6
|
||||
|
||||
// CHECK: vcvttsh2si edx, xmm6, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2c,0xd6]
|
||||
vcvttsh2si edx, xmm6, {sae}
|
||||
|
||||
// CHECK: vcvttsh2si edx, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttsh2si edx, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttsh2si edx, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x11]
|
||||
vcvttsh2si edx, word ptr [ecx]
|
||||
|
||||
// CHECK: vcvttsh2si edx, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f]
|
||||
vcvttsh2si edx, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vcvttsh2si edx, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80]
|
||||
vcvttsh2si edx, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvttsh2usi edx, xmm6
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0xd6]
|
||||
vcvttsh2usi edx, xmm6
|
||||
|
||||
// CHECK: vcvttsh2usi edx, xmm6, {sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x78,0xd6]
|
||||
vcvttsh2usi edx, xmm6, {sae}
|
||||
|
||||
// CHECK: vcvttsh2usi edx, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x94,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvttsh2usi edx, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvttsh2usi edx, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x11]
|
||||
vcvttsh2usi edx, word ptr [ecx]
|
||||
|
||||
// CHECK: vcvttsh2usi edx, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f]
|
||||
vcvttsh2usi edx, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vcvttsh2usi edx, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x52,0x80]
|
||||
vcvttsh2usi edx, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vcvtudq2ph ymm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7a,0xf5]
|
||||
vcvtudq2ph ymm6, zmm5
|
||||
|
||||
// CHECK: vcvtudq2ph ymm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7a,0xf5]
|
||||
vcvtudq2ph ymm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtudq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtudq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtudq2ph ymm6, dword ptr [ecx]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x58,0x7a,0x31]
|
||||
vcvtudq2ph ymm6, dword ptr [ecx]{1to16}
|
||||
|
||||
// CHECK: vcvtudq2ph ymm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7a,0x71,0x7f]
|
||||
vcvtudq2ph ymm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtudq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0xdf,0x7a,0x72,0x80]
|
||||
vcvtudq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x48,0x7a,0xf5]
|
||||
vcvtuqq2ph xmm6, zmm5
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x18,0x7a,0xf5]
|
||||
vcvtuqq2ph xmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtuqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm6, qword ptr [ecx]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x58,0x7a,0x31]
|
||||
vcvtuqq2ph xmm6, qword ptr [ecx]{1to8}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0x48,0x7a,0x71,0x7f]
|
||||
vcvtuqq2ph xmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
|
||||
// CHECK: encoding: [0x62,0xf5,0xff,0xdf,0x7a,0x72,0x80]
|
||||
vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
|
||||
|
||||
// CHECK: vcvtusi2sh xmm6, xmm5, edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0xf2]
|
||||
vcvtusi2sh xmm6, xmm5, edx
|
||||
|
||||
// CHECK: vcvtusi2sh xmm6, xmm5, {rn-sae}, edx
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x7b,0xf2]
|
||||
vcvtusi2sh xmm6, xmm5, {rn-sae}, edx
|
||||
|
||||
// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtusi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x31]
|
||||
vcvtusi2sh xmm6, xmm5, dword ptr [ecx]
|
||||
|
||||
// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [ecx + 508]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x71,0x7f]
|
||||
vcvtusi2sh xmm6, xmm5, dword ptr [ecx + 508]
|
||||
|
||||
// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [edx - 512]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x72,0x80]
|
||||
vcvtusi2sh xmm6, xmm5, dword ptr [edx - 512]
|
||||
|
||||
// CHECK: vcvtuw2ph zmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7d,0xf5]
|
||||
vcvtuw2ph zmm6, zmm5
|
||||
|
||||
// CHECK: vcvtuw2ph zmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7d,0xf5]
|
||||
vcvtuw2ph zmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtuw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtuw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtuw2ph zmm6, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x58,0x7d,0x31]
|
||||
vcvtuw2ph zmm6, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vcvtuw2ph zmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7d,0x71,0x7f]
|
||||
vcvtuw2ph zmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtuw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7f,0xdf,0x7d,0x72,0x80]
|
||||
vcvtuw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vcvtw2ph zmm6, zmm5
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x7d,0xf5]
|
||||
vcvtw2ph zmm6, zmm5
|
||||
|
||||
// CHECK: vcvtw2ph zmm6, zmm5, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x7d,0xf5]
|
||||
vcvtw2ph zmm6, zmm5, {rn-sae}
|
||||
|
||||
// CHECK: vcvtw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vcvtw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vcvtw2ph zmm6, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x58,0x7d,0x31]
|
||||
vcvtw2ph zmm6, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vcvtw2ph zmm6, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x7d,0x71,0x7f]
|
||||
vcvtw2ph zmm6, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vcvtw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7e,0xdf,0x7d,0x72,0x80]
|
||||
vcvtw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
|
||||
|
|
|
@ -279,3 +279,859 @@
|
|||
// CHECK: vsubph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x14,0x97,0x5c,0x72,0x80]
|
||||
vsubph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x5b,0xf5]
|
||||
vcvtdq2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x5b,0xf5]
|
||||
vcvtdq2ph xmm30, ymm29
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtdq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30, dword ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x5b,0x31]
|
||||
vcvtdq2ph xmm30, dword ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x5b,0x71,0x7f]
|
||||
vcvtdq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x5b,0x72,0x80]
|
||||
vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30, dword ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x5b,0x31]
|
||||
vcvtdq2ph xmm30, dword ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x5b,0x71,0x7f]
|
||||
vcvtdq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x5b,0x72,0x80]
|
||||
vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0xfd,0x08,0x5a,0xf5]
|
||||
vcvtpd2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0xfd,0x28,0x5a,0xf5]
|
||||
vcvtpd2ph xmm30, ymm29
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0xfd,0x0f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtpd2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30, qword ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0xfd,0x18,0x5a,0x31]
|
||||
vcvtpd2ph xmm30, qword ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0xfd,0x08,0x5a,0x71,0x7f]
|
||||
vcvtpd2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0xfd,0x9f,0x5a,0x72,0x80]
|
||||
vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30, qword ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0xfd,0x38,0x5a,0x31]
|
||||
vcvtpd2ph xmm30, qword ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0xfd,0x28,0x5a,0x71,0x7f]
|
||||
vcvtpd2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0xfd,0xbf,0x5a,0x72,0x80]
|
||||
vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
|
||||
|
||||
// CHECK: vcvtph2dq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x5b,0xf5]
|
||||
vcvtph2dq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2dq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x5b,0xf5]
|
||||
vcvtph2dq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2dq xmm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x5b,0x31]
|
||||
vcvtph2dq xmm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtph2dq xmm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x5b,0x71,0x7f]
|
||||
vcvtph2dq xmm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvtph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x5b,0x72,0x80]
|
||||
vcvtph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvtph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2dq ymm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x5b,0x31]
|
||||
vcvtph2dq ymm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtph2dq ymm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x5b,0x71,0x7f]
|
||||
vcvtph2dq ymm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x5b,0x72,0x80]
|
||||
vcvtph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2pd xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x5a,0xf5]
|
||||
vcvtph2pd xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2pd ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x5a,0xf5]
|
||||
vcvtph2pd ymm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2pd xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2pd xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2pd xmm30, word ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x5a,0x31]
|
||||
vcvtph2pd xmm30, word ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvtph2pd xmm30, dword ptr [rcx + 508]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x5a,0x71,0x7f]
|
||||
vcvtph2pd xmm30, dword ptr [rcx + 508]
|
||||
|
||||
// CHECK: vcvtph2pd xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x5a,0x72,0x80]
|
||||
vcvtph2pd xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
|
||||
// CHECK: vcvtph2pd ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2pd ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2pd ymm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x5a,0x31]
|
||||
vcvtph2pd ymm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtph2pd ymm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x5a,0x71,0x7f]
|
||||
vcvtph2pd ymm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvtph2pd ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x5a,0x72,0x80]
|
||||
vcvtph2pd ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvtph2psx xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x06,0x7d,0x08,0x13,0xf5]
|
||||
vcvtph2psx xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2psx ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x06,0x7d,0x28,0x13,0xf5]
|
||||
vcvtph2psx ymm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2psx xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x7d,0x0f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2psx xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2psx xmm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x46,0x7d,0x18,0x13,0x31]
|
||||
vcvtph2psx xmm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtph2psx xmm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x66,0x7d,0x08,0x13,0x71,0x7f]
|
||||
vcvtph2psx xmm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvtph2psx xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x66,0x7d,0x9f,0x13,0x72,0x80]
|
||||
vcvtph2psx xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvtph2psx ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x7d,0x2f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2psx ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2psx ymm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x7d,0x38,0x13,0x31]
|
||||
vcvtph2psx ymm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtph2psx ymm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x7d,0x28,0x13,0x71,0x7f]
|
||||
vcvtph2psx ymm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtph2psx ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x7d,0xbf,0x13,0x72,0x80]
|
||||
vcvtph2psx ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2qq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7b,0xf5]
|
||||
vcvtph2qq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2qq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7b,0xf5]
|
||||
vcvtph2qq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2qq xmm30, word ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7b,0x31]
|
||||
vcvtph2qq xmm30, word ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvtph2qq xmm30, dword ptr [rcx + 508]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7b,0x71,0x7f]
|
||||
vcvtph2qq xmm30, dword ptr [rcx + 508]
|
||||
|
||||
// CHECK: vcvtph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7b,0x72,0x80]
|
||||
vcvtph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
|
||||
// CHECK: vcvtph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2qq ymm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7b,0x31]
|
||||
vcvtph2qq ymm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtph2qq ymm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7b,0x71,0x7f]
|
||||
vcvtph2qq ymm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvtph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7b,0x72,0x80]
|
||||
vcvtph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvtph2udq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x79,0xf5]
|
||||
vcvtph2udq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2udq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x79,0xf5]
|
||||
vcvtph2udq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2udq xmm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x79,0x31]
|
||||
vcvtph2udq xmm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtph2udq xmm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x79,0x71,0x7f]
|
||||
vcvtph2udq xmm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvtph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x79,0x72,0x80]
|
||||
vcvtph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvtph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2udq ymm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x79,0x31]
|
||||
vcvtph2udq ymm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtph2udq ymm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x79,0x71,0x7f]
|
||||
vcvtph2udq ymm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x79,0x72,0x80]
|
||||
vcvtph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2uqq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x79,0xf5]
|
||||
vcvtph2uqq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2uqq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x79,0xf5]
|
||||
vcvtph2uqq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2uqq xmm30, word ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x79,0x31]
|
||||
vcvtph2uqq xmm30, word ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvtph2uqq xmm30, dword ptr [rcx + 508]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x79,0x71,0x7f]
|
||||
vcvtph2uqq xmm30, dword ptr [rcx + 508]
|
||||
|
||||
// CHECK: vcvtph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x79,0x72,0x80]
|
||||
vcvtph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
|
||||
// CHECK: vcvtph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2uqq ymm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x79,0x31]
|
||||
vcvtph2uqq ymm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtph2uqq ymm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x79,0x71,0x7f]
|
||||
vcvtph2uqq ymm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvtph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x79,0x72,0x80]
|
||||
vcvtph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvtph2uw xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x7d,0xf5]
|
||||
vcvtph2uw xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2uw ymm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x7d,0xf5]
|
||||
vcvtph2uw ymm30, ymm29
|
||||
|
||||
// CHECK: vcvtph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2uw xmm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x7d,0x31]
|
||||
vcvtph2uw xmm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtph2uw xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x7d,0x71,0x7f]
|
||||
vcvtph2uw xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x7d,0x72,0x80]
|
||||
vcvtph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2uw ymm30, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x7d,0x31]
|
||||
vcvtph2uw ymm30, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vcvtph2uw ymm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x7d,0x71,0x7f]
|
||||
vcvtph2uw ymm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x7d,0x72,0x80]
|
||||
vcvtph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtph2w xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7d,0xf5]
|
||||
vcvtph2w xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtph2w ymm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7d,0xf5]
|
||||
vcvtph2w ymm30, ymm29
|
||||
|
||||
// CHECK: vcvtph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2w xmm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7d,0x31]
|
||||
vcvtph2w xmm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtph2w xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7d,0x71,0x7f]
|
||||
vcvtph2w xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7d,0x72,0x80]
|
||||
vcvtph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtph2w ymm30, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7d,0x31]
|
||||
vcvtph2w ymm30, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vcvtph2w ymm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7d,0x71,0x7f]
|
||||
vcvtph2w ymm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7d,0x72,0x80]
|
||||
vcvtph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtps2phx xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x1d,0xf5]
|
||||
vcvtps2phx xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtps2phx xmm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x1d,0xf5]
|
||||
vcvtps2phx xmm30, ymm29
|
||||
|
||||
// CHECK: vcvtps2phx xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtps2phx xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtps2phx xmm30, dword ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x1d,0x31]
|
||||
vcvtps2phx xmm30, dword ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtps2phx xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x1d,0x71,0x7f]
|
||||
vcvtps2phx xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x1d,0x72,0x80]
|
||||
vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
|
||||
|
||||
// CHECK: vcvtps2phx xmm30, dword ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x1d,0x31]
|
||||
vcvtps2phx xmm30, dword ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtps2phx xmm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x1d,0x71,0x7f]
|
||||
vcvtps2phx xmm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x1d,0x72,0x80]
|
||||
vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0xfc,0x08,0x5b,0xf5]
|
||||
vcvtqq2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0xfc,0x28,0x5b,0xf5]
|
||||
vcvtqq2ph xmm30, ymm29
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0xfc,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30, qword ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0xfc,0x18,0x5b,0x31]
|
||||
vcvtqq2ph xmm30, qword ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0xfc,0x08,0x5b,0x71,0x7f]
|
||||
vcvtqq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0xfc,0x9f,0x5b,0x72,0x80]
|
||||
vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30, qword ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0xfc,0x38,0x5b,0x31]
|
||||
vcvtqq2ph xmm30, qword ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0xfc,0x28,0x5b,0x71,0x7f]
|
||||
vcvtqq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0xfc,0xbf,0x5b,0x72,0x80]
|
||||
vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
|
||||
|
||||
// CHECK: vcvttph2dq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x08,0x5b,0xf5]
|
||||
vcvttph2dq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2dq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x28,0x5b,0xf5]
|
||||
vcvttph2dq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7e,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2dq xmm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7e,0x18,0x5b,0x31]
|
||||
vcvttph2dq xmm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvttph2dq xmm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x08,0x5b,0x71,0x7f]
|
||||
vcvttph2dq xmm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvttph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x9f,0x5b,0x72,0x80]
|
||||
vcvttph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvttph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7e,0x2f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2dq ymm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7e,0x38,0x5b,0x31]
|
||||
vcvttph2dq ymm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvttph2dq ymm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x28,0x5b,0x71,0x7f]
|
||||
vcvttph2dq ymm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvttph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0xbf,0x5b,0x72,0x80]
|
||||
vcvttph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvttph2qq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7a,0xf5]
|
||||
vcvttph2qq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2qq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7a,0xf5]
|
||||
vcvttph2qq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2qq xmm30, word ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7a,0x31]
|
||||
vcvttph2qq xmm30, word ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvttph2qq xmm30, dword ptr [rcx + 508]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7a,0x71,0x7f]
|
||||
vcvttph2qq xmm30, dword ptr [rcx + 508]
|
||||
|
||||
// CHECK: vcvttph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7a,0x72,0x80]
|
||||
vcvttph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
|
||||
// CHECK: vcvttph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2qq ymm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7a,0x31]
|
||||
vcvttph2qq ymm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvttph2qq ymm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7a,0x71,0x7f]
|
||||
vcvttph2qq ymm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvttph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7a,0x72,0x80]
|
||||
vcvttph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvttph2udq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x78,0xf5]
|
||||
vcvttph2udq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2udq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x78,0xf5]
|
||||
vcvttph2udq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2udq xmm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x78,0x31]
|
||||
vcvttph2udq xmm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvttph2udq xmm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x78,0x71,0x7f]
|
||||
vcvttph2udq xmm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvttph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x78,0x72,0x80]
|
||||
vcvttph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvttph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2udq ymm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x78,0x31]
|
||||
vcvttph2udq ymm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvttph2udq ymm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x78,0x71,0x7f]
|
||||
vcvttph2udq ymm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvttph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x78,0x72,0x80]
|
||||
vcvttph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvttph2uqq xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x78,0xf5]
|
||||
vcvttph2uqq xmm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2uqq ymm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x78,0xf5]
|
||||
vcvttph2uqq ymm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2uqq xmm30, word ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x78,0x31]
|
||||
vcvttph2uqq xmm30, word ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvttph2uqq xmm30, dword ptr [rcx + 508]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x78,0x71,0x7f]
|
||||
vcvttph2uqq xmm30, dword ptr [rcx + 508]
|
||||
|
||||
// CHECK: vcvttph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x78,0x72,0x80]
|
||||
vcvttph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
|
||||
|
||||
// CHECK: vcvttph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2uqq ymm30, word ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x78,0x31]
|
||||
vcvttph2uqq ymm30, word ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvttph2uqq ymm30, qword ptr [rcx + 1016]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x78,0x71,0x7f]
|
||||
vcvttph2uqq ymm30, qword ptr [rcx + 1016]
|
||||
|
||||
// CHECK: vcvttph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x78,0x72,0x80]
|
||||
vcvttph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
|
||||
|
||||
// CHECK: vcvttph2uw xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x7c,0xf5]
|
||||
vcvttph2uw xmm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2uw ymm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x7c,0xf5]
|
||||
vcvttph2uw ymm30, ymm29
|
||||
|
||||
// CHECK: vcvttph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2uw xmm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x7c,0x31]
|
||||
vcvttph2uw xmm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvttph2uw xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x7c,0x71,0x7f]
|
||||
vcvttph2uw xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvttph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x7c,0x72,0x80]
|
||||
vcvttph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvttph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2uw ymm30, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x7c,0x31]
|
||||
vcvttph2uw ymm30, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vcvttph2uw ymm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x7c,0x71,0x7f]
|
||||
vcvttph2uw ymm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvttph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x7c,0x72,0x80]
|
||||
vcvttph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvttph2w xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7c,0xf5]
|
||||
vcvttph2w xmm30, xmm29
|
||||
|
||||
// CHECK: vcvttph2w ymm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7c,0xf5]
|
||||
vcvttph2w ymm30, ymm29
|
||||
|
||||
// CHECK: vcvttph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2w xmm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7c,0x31]
|
||||
vcvttph2w xmm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvttph2w xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7c,0x71,0x7f]
|
||||
vcvttph2w xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvttph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7c,0x72,0x80]
|
||||
vcvttph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvttph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvttph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvttph2w ymm30, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7c,0x31]
|
||||
vcvttph2w ymm30, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vcvttph2w ymm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7c,0x71,0x7f]
|
||||
vcvttph2w ymm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvttph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7c,0x72,0x80]
|
||||
vcvttph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x08,0x7a,0xf5]
|
||||
vcvtudq2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x28,0x7a,0xf5]
|
||||
vcvtudq2ph xmm30, ymm29
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7f,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtudq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30, dword ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0x7f,0x18,0x7a,0x31]
|
||||
vcvtudq2ph xmm30, dword ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x08,0x7a,0x71,0x7f]
|
||||
vcvtudq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x9f,0x7a,0x72,0x80]
|
||||
vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30, dword ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7f,0x38,0x7a,0x31]
|
||||
vcvtudq2ph xmm30, dword ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x28,0x7a,0x71,0x7f]
|
||||
vcvtudq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0xbf,0x7a,0x72,0x80]
|
||||
vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0xff,0x08,0x7a,0xf5]
|
||||
vcvtuqq2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0xff,0x28,0x7a,0xf5]
|
||||
vcvtuqq2ph xmm30, ymm29
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0xff,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtuqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30, qword ptr [r9]{1to2}
|
||||
// CHECK: encoding: [0x62,0x45,0xff,0x18,0x7a,0x31]
|
||||
vcvtuqq2ph xmm30, qword ptr [r9]{1to2}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0xff,0x08,0x7a,0x71,0x7f]
|
||||
vcvtuqq2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
|
||||
// CHECK: encoding: [0x62,0x65,0xff,0x9f,0x7a,0x72,0x80]
|
||||
vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30, qword ptr [r9]{1to4}
|
||||
// CHECK: encoding: [0x62,0x45,0xff,0x38,0x7a,0x31]
|
||||
vcvtuqq2ph xmm30, qword ptr [r9]{1to4}
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0xff,0x28,0x7a,0x71,0x7f]
|
||||
vcvtuqq2ph xmm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
|
||||
// CHECK: encoding: [0x62,0x65,0xff,0xbf,0x7a,0x72,0x80]
|
||||
vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
|
||||
|
||||
// CHECK: vcvtuw2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x08,0x7d,0xf5]
|
||||
vcvtuw2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtuw2ph ymm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7f,0x28,0x7d,0xf5]
|
||||
vcvtuw2ph ymm30, ymm29
|
||||
|
||||
// CHECK: vcvtuw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7f,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtuw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtuw2ph xmm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7f,0x18,0x7d,0x31]
|
||||
vcvtuw2ph xmm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtuw2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x08,0x7d,0x71,0x7f]
|
||||
vcvtuw2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtuw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x9f,0x7d,0x72,0x80]
|
||||
vcvtuw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtuw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7f,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtuw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtuw2ph ymm30, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x45,0x7f,0x38,0x7d,0x31]
|
||||
vcvtuw2ph ymm30, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vcvtuw2ph ymm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0x28,0x7d,0x71,0x7f]
|
||||
vcvtuw2ph ymm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtuw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7f,0xbf,0x7d,0x72,0x80]
|
||||
vcvtuw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vcvtw2ph xmm30, xmm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x08,0x7d,0xf5]
|
||||
vcvtw2ph xmm30, xmm29
|
||||
|
||||
// CHECK: vcvtw2ph ymm30, ymm29
|
||||
// CHECK: encoding: [0x62,0x05,0x7e,0x28,0x7d,0xf5]
|
||||
vcvtw2ph ymm30, ymm29
|
||||
|
||||
// CHECK: vcvtw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7e,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtw2ph xmm30, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x45,0x7e,0x18,0x7d,0x31]
|
||||
vcvtw2ph xmm30, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vcvtw2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x08,0x7d,0x71,0x7f]
|
||||
vcvtw2ph xmm30, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vcvtw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x9f,0x7d,0x72,0x80]
|
||||
vcvtw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vcvtw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x25,0x7e,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vcvtw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vcvtw2ph ymm30, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x45,0x7e,0x38,0x7d,0x31]
|
||||
vcvtw2ph ymm30, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vcvtw2ph ymm30, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0x28,0x7d,0x71,0x7f]
|
||||
vcvtw2ph ymm30, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vcvtw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7e,0xbf,0x7d,0x72,0x80]
|
||||
vcvtw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
|
Loading…
Reference in New Issue