forked from OSchip/llvm-project
[X86] Support Intel avxvnni
This patch mainly made the following changes: 1. Support AVX-VNNI instructions; 2. Introduce ExplicitVEXPrefix flag so that vpdpbusd/vpdpbusds/vpdpbusds/vpdpbusds instructions only use vex-encoding when user explicity add {vex} prefix. Differential Revision: https://reviews.llvm.org/D89105
This commit is contained in:
parent
d11710dae6
commit
756f597841
|
@ -3253,6 +3253,8 @@ X86
|
|||
|
||||
.. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq
|
||||
|
||||
.. option:: -mavxvnni, -mno-avxvnni
|
||||
|
||||
.. option:: -mbmi, -mno-bmi
|
||||
|
||||
.. option:: -mbmi2, -mno-bmi2
|
||||
|
|
|
@ -205,6 +205,8 @@ X86 Support in Clang
|
|||
|
||||
- Support for ``UINTR`` instructions has been added.
|
||||
|
||||
- Support for ``AVXVNNI`` instructions has been added.
|
||||
|
||||
Internal API Changes
|
||||
--------------------
|
||||
|
||||
|
|
|
@ -960,17 +960,17 @@ TARGET_BUILTIN(__builtin_ia32_alignq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl
|
|||
TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2dvC*V2OiUcIi", "nV:128:", "avx512vl")
|
||||
|
|
|
@ -3235,6 +3235,8 @@ def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Gro
|
|||
def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
|
||||
def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group<m_x86_Features_Group>;
|
||||
def mavxvnni : Flag<["-"], "mavxvnni">, Group<m_x86_Features_Group>;
|
||||
def mno_avxvnni : Flag<["-"], "mno-avxvnni">, Group<m_x86_Features_Group>;
|
||||
def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
|
||||
def mno_adx : Flag<["-"], "mno-adx">, Group<m_x86_Features_Group>;
|
||||
def maes : Flag<["-"], "maes">, Group<m_x86_Features_Group>;
|
||||
|
|
|
@ -306,6 +306,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
HasAMXINT8 = true;
|
||||
} else if (Feature == "+amx-tile") {
|
||||
HasAMXTILE = true;
|
||||
} else if (Feature == "+avxvnni") {
|
||||
HasAVXVNNI = true;
|
||||
} else if (Feature == "+serialize") {
|
||||
HasSERIALIZE = true;
|
||||
} else if (Feature == "+tsxldtrk") {
|
||||
|
@ -728,6 +730,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
Builder.defineMacro("__AMXINT8__");
|
||||
if (HasAMXBF16)
|
||||
Builder.defineMacro("__AMXBF16__");
|
||||
if (HasAVXVNNI)
|
||||
Builder.defineMacro("__AVXVNNI__");
|
||||
if (HasSERIALIZE)
|
||||
Builder.defineMacro("__SERIALIZE__");
|
||||
if (HasTSXLDTRK)
|
||||
|
@ -846,6 +850,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
|||
.Case("avx512vbmi2", true)
|
||||
.Case("avx512ifma", true)
|
||||
.Case("avx512vp2intersect", true)
|
||||
.Case("avxvnni", true)
|
||||
.Case("bmi", true)
|
||||
.Case("bmi2", true)
|
||||
.Case("cldemote", true)
|
||||
|
@ -918,6 +923,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("amx-bf16", HasAMXBF16)
|
||||
.Case("amx-int8", HasAMXINT8)
|
||||
.Case("amx-tile", HasAMXTILE)
|
||||
.Case("avxvnni", HasAVXVNNI)
|
||||
.Case("avx", SSELevel >= AVX)
|
||||
.Case("avx2", SSELevel >= AVX2)
|
||||
.Case("avx512f", SSELevel >= AVX512F)
|
||||
|
|
|
@ -130,6 +130,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
|||
bool HasKL = false; // For key locker
|
||||
bool HasWIDEKL = false; // For wide key locker
|
||||
bool HasHRESET = false;
|
||||
bool HasAVXVNNI = false;
|
||||
bool HasAMXTILE = false;
|
||||
bool HasAMXINT8 = false;
|
||||
bool HasAMXBF16 = false;
|
||||
|
|
|
@ -35,6 +35,7 @@ set(files
|
|||
avx512vnniintrin.h
|
||||
avx512vlvnniintrin.h
|
||||
avxintrin.h
|
||||
avxvnniintrin.h
|
||||
bmi2intrin.h
|
||||
bmiintrin.h
|
||||
__clang_cuda_builtin_vars.h
|
||||
|
|
|
@ -18,13 +18,157 @@
|
|||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
|
||||
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a S, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3]))
|
||||
/// DST.dword[j] := S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
#define _mm256_dpbusd_epi32(S, A, B) \
|
||||
(__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
|
||||
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a S using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3]))
|
||||
/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
#define _mm256_dpbusds_epi32(S, A, B) \
|
||||
(__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B))
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
|
||||
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a S,
|
||||
/// and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1])
|
||||
/// DST.dword[j] := S.dword[j] + tmp1 + tmp2
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
#define _mm256_dpwssd_epi32(S, A, B) \
|
||||
(__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B))
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
|
||||
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a S
|
||||
/// using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1])
|
||||
/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
#define _mm256_dpwssds_epi32(S, A, B) \
|
||||
(__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B))
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
|
||||
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a S, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3]))
|
||||
/// DST.dword[j] := S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
#define _mm_dpbusd_epi32(S, A, B) \
|
||||
(__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B))
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
|
||||
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a S using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3]))
|
||||
/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
#define _mm_dpbusds_epi32(S, A, B) \
|
||||
(__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B))
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
|
||||
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a S,
|
||||
/// and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1])
|
||||
/// DST.dword[j] := S.dword[j] + tmp1 + tmp2
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
#define _mm_dpwssd_epi32(S, A, B) \
|
||||
(__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B))
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
|
||||
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a S
|
||||
/// using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1])
|
||||
/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
#define _mm_dpwssds_epi32(S, A, B) \
|
||||
(__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
|
@ -42,13 +186,6 @@ _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
|||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
|
@ -65,13 +202,6 @@ _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
|||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
|
@ -88,13 +218,6 @@ _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
|||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
|
@ -111,13 +234,6 @@ _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
|||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
|
@ -134,13 +250,6 @@ _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
|||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
|
@ -157,13 +266,6 @@ _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
|||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
|
@ -180,13 +282,6 @@ _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
|||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
/*===--------------- avxvnniintrin.h - VNNI intrinsics --------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVXVNNIINTRIN_H
|
||||
#define __AVXVNNIINTRIN_H
|
||||
|
||||
/* Below intrinsics defined in avx512vlvnniintrin.h can be used for AVXVNNI */
|
||||
/// \fn __m256i _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
/// \fn __m256i _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
/// \fn __m256i _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
/// \fn __m256i _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
/// \fn __m128i _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
/// \fn __m128i _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
/// \fn __m128i _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
/// \fn __m128i _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
|
||||
/* Intrinsics with _avx_ prefix are for compatibility with msvc. */
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256)))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128)))
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
|
||||
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a __S, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]))
|
||||
/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
|
||||
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a __S using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]))
|
||||
/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
|
||||
/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S,
|
||||
/// and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
|
||||
/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
|
||||
/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S
|
||||
/// using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 7
|
||||
/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
|
||||
/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:256] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
|
||||
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a __S, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]))
|
||||
/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
|
||||
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed
|
||||
/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer
|
||||
/// in \a __S using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPBUSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]))
|
||||
/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]))
|
||||
/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]))
|
||||
/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]))
|
||||
/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
|
||||
/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S,
|
||||
/// and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSD </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
|
||||
/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
|
||||
/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit
|
||||
/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S
|
||||
/// using signed saturation, and store the packed 32-bit results in DST.
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPDPWSSDS </c> instructions.
|
||||
///
|
||||
/// \operation
|
||||
/// FOR j := 0 to 3
|
||||
/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
|
||||
/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
|
||||
/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2)
|
||||
/// ENDFOR
|
||||
/// DST[MAX:128] := 0
|
||||
/// \endoperation
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif // __AVXVNNIINTRIN_H
|
|
@ -196,6 +196,7 @@
|
|||
#define bit_AMXINT8 0x02000000
|
||||
|
||||
/* Features in %eax for leaf 7 sub-leaf 1 */
|
||||
#define bit_AVXVNNI 0x00000008
|
||||
#define bit_AVX512BF16 0x00000020
|
||||
#define bit_HRESET 0x00400000
|
||||
|
||||
|
|
|
@ -145,6 +145,11 @@
|
|||
#include <avx512vlvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVXVNNI__)
|
||||
#include <avxvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512DQ__)
|
||||
#include <avx512dqintrin.h>
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpbusd_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusd.256
|
||||
return _mm256_dpbusd_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpbusds_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusds.256
|
||||
return _mm256_dpbusds_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpwssd_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssd.256
|
||||
return _mm256_dpwssd_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpwssds_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssds.256
|
||||
return _mm256_dpwssds_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpbusd_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusd.128
|
||||
return _mm_dpbusd_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpbusds_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusds.128
|
||||
return _mm_dpbusds_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpwssd_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssd.128
|
||||
return _mm_dpwssd_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpwssds_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssds.128
|
||||
return _mm_dpwssds_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpbusd_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusd.256
|
||||
return _mm256_dpbusd_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpbusds_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusds.256
|
||||
return _mm256_dpbusds_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpwssd_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssd.256
|
||||
return _mm256_dpwssd_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
|
||||
// CHECK-LABEL: @test_mm256_dpwssds_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssds.256
|
||||
return _mm256_dpwssds_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpbusd_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusd.128
|
||||
return _mm_dpbusd_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpbusds_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpbusds.128
|
||||
return _mm_dpbusds_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpwssd_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssd.128
|
||||
return _mm_dpwssd_avx_epi32(__S, __A, __B);
|
||||
}
|
||||
|
||||
__m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
|
||||
// CHECK-LABEL: @test_mm_dpwssds_avx_epi32
|
||||
// CHECK: @llvm.x86.avx512.vpdpwssds.128
|
||||
return _mm_dpwssds_avx_epi32(__S, __A, __B);
|
||||
}
|
|
@ -54,9 +54,9 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
|
|||
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
|
||||
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
|
||||
// CHECK-NOT: tune-cpu
|
||||
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
|
||||
|
|
|
@ -288,3 +288,8 @@
|
|||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-uintr %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-UINTR %s
|
||||
// UINTR: "-target-feature" "+uintr"
|
||||
// NO-UINTR: "-target-feature" "-uintr"
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AVX-VNNI %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AVX-VNNI %s
|
||||
// AVX-VNNI: "-target-feature" "+avxvnni"
|
||||
// NO-AVX-VNNI: "-target-feature" "-avxvnni"
|
||||
|
|
|
@ -1654,6 +1654,7 @@
|
|||
// CHECK_SPR_M32: #define __AVX512VL__ 1
|
||||
// CHECK_SPR_M32: #define __AVX512VNNI__ 1
|
||||
// CHECK_SPR_M32: #define __AVX512VPOPCNTDQ__ 1
|
||||
// CHECK_SPR_M32: #define __AVXVNNI__ 1
|
||||
// CHECK_SPR_M32: #define __AVX__ 1
|
||||
// CHECK_SPR_M32: #define __BMI2__ 1
|
||||
// CHECK_SPR_M32: #define __BMI__ 1
|
||||
|
@ -1724,6 +1725,7 @@
|
|||
// CHECK_SPR_M64: #define __AVX512VL__ 1
|
||||
// CHECK_SPR_M64: #define __AVX512VNNI__ 1
|
||||
// CHECK_SPR_M64: #define __AVX512VPOPCNTDQ__ 1
|
||||
// CHECK_SPR_M64: #define __AVXVNNI__ 1
|
||||
// CHECK_SPR_M64: #define __AVX__ 1
|
||||
// CHECK_SPR_M64: #define __BMI2__ 1
|
||||
// CHECK_SPR_M64: #define __BMI__ 1
|
||||
|
@ -1782,6 +1784,7 @@
|
|||
// CHECK_ADL_M32: #define __AES__ 1
|
||||
// CHECK_ADL_M32: #define __AVX2__ 1
|
||||
// CHECK_ADL_M32-NOT: AVX512
|
||||
// CHECK_ADL_M32: #define __AVXVNNI__ 1
|
||||
// CHECK_ADL_M32: #define __AVX__ 1
|
||||
// CHECK_ADL_M32: #define __BMI2__ 1
|
||||
// CHECK_ADL_M32: #define __BMI__ 1
|
||||
|
@ -1822,6 +1825,7 @@
|
|||
// CHECK_ADL_M64: #define __AES__ 1
|
||||
// CHECK_ADL_M64: #define __AVX2__ 1
|
||||
// CHECK_ADL_M64-NOT: AVX512
|
||||
// CHECK_ADL_M64: #define __AVXVNNI__ 1
|
||||
// CHECK_ADL_M64: #define __AVX__ 1
|
||||
// CHECK_ADL_M64: #define __BMI2__ 1
|
||||
// CHECK_ADL_M64: #define __BMI__ 1
|
||||
|
|
|
@ -544,3 +544,17 @@
|
|||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-uintr -x c -E -dM -o - %s | FileCheck -check-prefix=NOUINTR %s
|
||||
|
||||
// NOUINTR-NOT: #define __UINTR__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNI %s
|
||||
|
||||
// AVXVNNI: #define __AVX2__ 1
|
||||
// AVXVNNI: #define __AVXVNNI__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mno-avxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOAVXVNNI %s
|
||||
|
||||
// NOAVXVNNI-NOT: #define __AVXVNNI__ 1
|
||||
|
||||
// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNINOAVX2 %s
|
||||
|
||||
// AVXVNNINOAVX2-NOT: #define __AVX2__ 1
|
||||
// AVXVNNINOAVX2-NOT: #define __AVXVNNI__ 1
|
||||
|
|
|
@ -121,6 +121,7 @@ During this release ...
|
|||
the target CPU.
|
||||
* Support for ``HRESET`` instructions has been added.
|
||||
* Support for ``UINTR`` instructions has been added.
|
||||
* Support for ``AVXVNNI`` instructions has been added.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
|
|
@ -190,6 +190,7 @@ X86_FEATURE (XSAVEC, "xsavec")
|
|||
X86_FEATURE (XSAVEOPT, "xsaveopt")
|
||||
X86_FEATURE (XSAVES, "xsaves")
|
||||
X86_FEATURE (HRESET, "hreset")
|
||||
X86_FEATURE (AVXVNNI, "avxvnni")
|
||||
// These features aren't really CPU features, but the frontend can set them.
|
||||
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
|
||||
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
|
||||
|
|
|
@ -1497,6 +1497,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
|
|||
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
|
||||
bool HasLeaf7Subleaf1 =
|
||||
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
|
||||
Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
|
||||
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
|
||||
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
|
||||
|
||||
|
|
|
@ -205,10 +205,10 @@ constexpr FeatureBitset FeaturesSapphireRapids =
|
|||
FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE |
|
||||
FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
|
||||
FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
|
||||
FeatureWAITPKG;
|
||||
FeatureWAITPKG | FeatureAVXVNNI;
|
||||
constexpr FeatureBitset FeaturesAlderlake =
|
||||
FeaturesSkylakeClient | FeatureCLDEMOTE | FeatureHRESET | FeaturePTWRITE |
|
||||
FeatureSERIALIZE | FeatureWAITPKG;
|
||||
FeatureSERIALIZE | FeatureWAITPKG | FeatureAVXVNNI;
|
||||
|
||||
// Intel Atom processors.
|
||||
// Bonnell has feature parity with Core2 and adds MOVBE.
|
||||
|
@ -575,6 +575,9 @@ constexpr FeatureBitset ImpliedFeaturesHRESET = {};
|
|||
constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
|
||||
constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
|
||||
|
||||
// AVXVNNI Features
|
||||
constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
|
||||
|
||||
constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
|
||||
#define X86_FEATURE(ENUM, STR) {{STR}, ImpliedFeatures##ENUM},
|
||||
#include "llvm/Support/X86TargetParser.def"
|
||||
|
|
|
@ -3845,6 +3845,13 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
|
|||
(MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
|
||||
return Match_Unsupported;
|
||||
|
||||
// These instructions are only available with {vex}, {vex2} or {vex3} prefix
|
||||
if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
|
||||
(ForcedVEXEncoding != VEXEncoding_VEX &&
|
||||
ForcedVEXEncoding != VEXEncoding_VEX2 &&
|
||||
ForcedVEXEncoding != VEXEncoding_VEX3))
|
||||
return Match_Unsupported;
|
||||
|
||||
// These instructions match ambiguously with their VEX encoded counterparts
|
||||
// and appear first in the matching table. Reject them unless we're forcing
|
||||
// EVEX encoding.
|
||||
|
|
|
@ -952,7 +952,11 @@ namespace X86II {
|
|||
|
||||
// NOTRACK prefix
|
||||
NoTrackShift = EVEX_RCShift + 1,
|
||||
NOTRACK = 1ULL << NoTrackShift
|
||||
NOTRACK = 1ULL << NoTrackShift,
|
||||
|
||||
// Force VEX encoding
|
||||
ExplicitVEXShift = NoTrackShift + 1,
|
||||
ExplicitVEXPrefix = 1ULL << ExplicitVEXShift
|
||||
};
|
||||
|
||||
/// \returns true if the instruction with given opcode is a prefix.
|
||||
|
|
|
@ -348,7 +348,7 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
|
|||
O << "\trep\t";
|
||||
|
||||
// These all require a pseudo prefix
|
||||
if (Flags & X86::IP_USE_VEX)
|
||||
if ((Flags & X86::IP_USE_VEX) || (TSFlags & X86II::ExplicitVEXPrefix))
|
||||
O << "\t{vex}";
|
||||
else if (Flags & X86::IP_USE_VEX2)
|
||||
O << "\t{vex2}";
|
||||
|
|
|
@ -171,6 +171,9 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
|
|||
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
|
||||
"Enable AVX-512 Vector Neural Network Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
|
||||
"Support AVX_VNNI encoding",
|
||||
[FeatureAVX2]>;
|
||||
def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
|
||||
"Support bfloat16 floating point",
|
||||
[FeatureBWI]>;
|
||||
|
@ -769,6 +772,7 @@ def ProcessorFeatures {
|
|||
FeatureCLDEMOTE,
|
||||
FeatureWAITPKG,
|
||||
FeaturePTWRITE,
|
||||
FeatureAVXVNNI,
|
||||
FeatureTSXLDTRK,
|
||||
FeatureENQCMD,
|
||||
FeatureSHSTK,
|
||||
|
@ -781,7 +785,8 @@ def ProcessorFeatures {
|
|||
!listconcat(ICXFeatures, SPRAdditionalFeatures);
|
||||
|
||||
// Alderlake
|
||||
list<SubtargetFeature> ADLAdditionalFeatures = [FeatureCLDEMOTE,
|
||||
list<SubtargetFeature> ADLAdditionalFeatures = [FeatureAVXVNNI,
|
||||
FeatureCLDEMOTE,
|
||||
FeatureHRESET,
|
||||
FeaturePTWRITE,
|
||||
FeatureSERIALIZE,
|
||||
|
|
|
@ -85,6 +85,8 @@ public:
|
|||
private:
|
||||
/// Machine instruction info used throughout the class.
|
||||
const X86InstrInfo *TII = nullptr;
|
||||
|
||||
const X86Subtarget *ST = nullptr;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -94,8 +96,8 @@ char EvexToVexInstPass::ID = 0;
|
|||
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
||||
|
||||
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
|
||||
if (!ST.hasAVX512())
|
||||
ST = &MF.getSubtarget<X86Subtarget>();
|
||||
if (!ST->hasAVX512())
|
||||
return false;
|
||||
|
||||
bool Changed = false;
|
||||
|
@ -144,10 +146,29 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
|
|||
}
|
||||
|
||||
// Do any custom cleanup needed to finalize the conversion.
|
||||
static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
|
||||
static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
|
||||
const X86Subtarget *ST) {
|
||||
(void)NewOpc;
|
||||
unsigned Opc = MI.getOpcode();
|
||||
switch (Opc) {
|
||||
case X86::VPDPBUSDSZ256m:
|
||||
case X86::VPDPBUSDSZ256r:
|
||||
case X86::VPDPBUSDSZ128m:
|
||||
case X86::VPDPBUSDSZ128r:
|
||||
case X86::VPDPBUSDZ256m:
|
||||
case X86::VPDPBUSDZ256r:
|
||||
case X86::VPDPBUSDZ128m:
|
||||
case X86::VPDPBUSDZ128r:
|
||||
case X86::VPDPWSSDSZ256m:
|
||||
case X86::VPDPWSSDSZ256r:
|
||||
case X86::VPDPWSSDSZ128m:
|
||||
case X86::VPDPWSSDSZ128r:
|
||||
case X86::VPDPWSSDZ256m:
|
||||
case X86::VPDPWSSDZ256r:
|
||||
case X86::VPDPWSSDZ128m:
|
||||
case X86::VPDPWSSDZ128r:
|
||||
// These can only VEX convert if AVXVNNI is enabled.
|
||||
return ST->hasAVXVNNI();
|
||||
case X86::VALIGNDZ128rri:
|
||||
case X86::VALIGNDZ128rmi:
|
||||
case X86::VALIGNQZ128rri:
|
||||
|
@ -259,7 +280,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
|
|||
if (usesExtendedRegister(MI))
|
||||
return false;
|
||||
|
||||
if (!performCustomAdjustments(MI, NewOpc))
|
||||
if (!performCustomAdjustments(MI, NewOpc, ST))
|
||||
return false;
|
||||
|
||||
MI.setDesc(TII->get(NewOpc));
|
||||
|
|
|
@ -3748,18 +3748,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 },
|
||||
{ X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 },
|
||||
{ X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 },
|
||||
{ X86::VPDPBUSDSYrr, X86::VPDPBUSDSYrm, 0 },
|
||||
{ X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128m, 0 },
|
||||
{ X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256m, 0 },
|
||||
{ X86::VPDPBUSDSZr, X86::VPDPBUSDSZm, 0 },
|
||||
{ X86::VPDPBUSDSrr, X86::VPDPBUSDSrm, 0 },
|
||||
{ X86::VPDPBUSDYrr, X86::VPDPBUSDYrm, 0 },
|
||||
{ X86::VPDPBUSDZ128r, X86::VPDPBUSDZ128m, 0 },
|
||||
{ X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256m, 0 },
|
||||
{ X86::VPDPBUSDZr, X86::VPDPBUSDZm, 0 },
|
||||
{ X86::VPDPBUSDrr, X86::VPDPBUSDrm, 0 },
|
||||
{ X86::VPDPWSSDSYrr, X86::VPDPWSSDSYrm, 0 },
|
||||
{ X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128m, 0 },
|
||||
{ X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256m, 0 },
|
||||
{ X86::VPDPWSSDSZr, X86::VPDPWSSDSZm, 0 },
|
||||
{ X86::VPDPWSSDSrr, X86::VPDPWSSDSrm, 0 },
|
||||
{ X86::VPDPWSSDYrr, X86::VPDPWSSDYrm, 0 },
|
||||
{ X86::VPDPWSSDZ128r, X86::VPDPWSSDZ128m, 0 },
|
||||
{ X86::VPDPWSSDZ256r, X86::VPDPWSSDZ256m, 0 },
|
||||
{ X86::VPDPWSSDZr, X86::VPDPWSSDZm, 0 },
|
||||
{ X86::VPDPWSSDrr, X86::VPDPWSSDrm, 0 },
|
||||
{ X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
|
||||
{ X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
|
||||
{ X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
|
||||
|
|
|
@ -264,6 +264,9 @@ class NotMemoryFoldable { bit isMemoryFoldable = 0; }
|
|||
// Prevent EVEX->VEX conversion from considering this instruction.
|
||||
class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
|
||||
|
||||
// Force the instruction to use VEX encoding.
|
||||
class ExplicitVEXPrefix { bit ExplicitVEXPrefix = 1; }
|
||||
|
||||
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
string AsmStr, Domain d = GenericDomain>
|
||||
: Instruction {
|
||||
|
@ -348,6 +351,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
|||
|
||||
bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction?
|
||||
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
|
||||
bit ExplicitVEXPrefix = 0; // Force the instruction to use VEX encoding.
|
||||
|
||||
// TSFlags layout should be kept in sync with X86BaseInfo.h.
|
||||
let TSFlags{6-0} = FormBits;
|
||||
|
@ -376,6 +380,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
|||
let TSFlags{51-45} = CD8_Scale;
|
||||
let TSFlags{52} = hasEVEX_RC;
|
||||
let TSFlags{53} = hasNoTrackPrefix;
|
||||
let TSFlags{54} = ExplicitVEXPrefix;
|
||||
}
|
||||
|
||||
class PseudoI<dag oops, dag iops, list<dag> pattern>
|
||||
|
|
|
@ -2568,6 +2568,10 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
|
|||
case X86::VPTERNLOGQZ256rmbikz:
|
||||
case X86::VPTERNLOGQZrmbikz:
|
||||
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
|
||||
case X86::VPDPWSSDYrr:
|
||||
case X86::VPDPWSSDrr:
|
||||
case X86::VPDPWSSDSYrr:
|
||||
case X86::VPDPWSSDSrr:
|
||||
case X86::VPDPWSSDZ128r:
|
||||
case X86::VPDPWSSDZ128rk:
|
||||
case X86::VPDPWSSDZ128rkz:
|
||||
|
|
|
@ -910,6 +910,8 @@ def PKU : Predicate<"Subtarget->hasPKU()">;
|
|||
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
|
||||
def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
|
||||
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
|
||||
def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
|
||||
def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
|
||||
|
||||
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
|
|
|
@ -7164,6 +7164,48 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
|
|||
int_x86_avx_maskstore_pd_256,
|
||||
WriteFMaskMove64, WriteFMaskMove64Y>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX_VNNI
|
||||
//===----------------------------------------------------------------------===//
|
||||
let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in
|
||||
multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
bit IsCommutable> {
|
||||
let isCommutable = IsCommutable in
|
||||
def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
|
||||
VR128:$src2, VR128:$src3)))]>,
|
||||
VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
|
||||
|
||||
def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
|
||||
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
|
||||
(loadv4i32 addr:$src3))))]>,
|
||||
VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
|
||||
|
||||
let isCommutable = IsCommutable in
|
||||
def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
|
||||
VR256:$src2, VR256:$src3)))]>,
|
||||
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
|
||||
|
||||
def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
|
||||
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
|
||||
(loadv8i32 addr:$src3))))]>,
|
||||
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
|
||||
}
|
||||
|
||||
defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix;
|
||||
defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix;
|
||||
defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix;
|
||||
defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VPERMIL - Permute Single and Double Floating-Point Values
|
||||
//
|
||||
|
|
|
@ -355,6 +355,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
|
|||
/// Processor has AVX-512 Vector Neural Network Instructions
|
||||
bool HasVNNI = false;
|
||||
|
||||
/// Processor has AVX Vector Neural Network Instructions
|
||||
bool HasAVXVNNI = false;
|
||||
|
||||
/// Processor has AVX-512 bfloat16 floating-point extensions
|
||||
bool HasBF16 = false;
|
||||
|
||||
|
@ -750,6 +753,7 @@ public:
|
|||
bool useRetpolineIndirectBranches() const {
|
||||
return UseRetpolineIndirectBranches;
|
||||
}
|
||||
bool hasAVXVNNI() const { return HasAVXVNNI; }
|
||||
bool hasAMXTILE() const { return HasAMXTILE; }
|
||||
bool hasAMXBF16() const { return HasAMXBF16; }
|
||||
bool hasAMXINT8() const { return HasAMXINT8; }
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVXVNNI
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVX512VNNI
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVXVNNI
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=AVX512VNNI
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusd_256:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpbusd_256:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x50,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusd_128:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpbusd_128:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x50,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusds_256:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpbusds_256:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x51,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpbusds_128:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpbusds_128:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x51,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpwssd_256:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x52,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpwssd_256:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpwssd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x52,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpwssd_128:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x52,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpwssd_128:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpwssd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x52,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpwssds_256:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x53,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpwssds_256:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpwssds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x53,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
|
||||
; AVXVNNI-LABEL: test_int_x86_avx_vpdpwssds_128:
|
||||
; AVXVNNI: # %bb.0:
|
||||
; AVXVNNI-NEXT: {vex} vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x53,0xc2]
|
||||
; AVXVNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VNNI-LABEL: test_int_x86_avx_vpdpwssds_128:
|
||||
; AVX512VNNI: # %bb.0:
|
||||
; AVX512VNNI-NEXT: {vex} vpdpwssds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x53,0xc2]
|
||||
; AVX512VNNI-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
|
||||
ret <4 x i32> %res
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avxvnni < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>)
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpwssd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpwssd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssd_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpwssd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpwssd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssd_256_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpwssds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssds:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpwssds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssds_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpwssds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssds_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpwssds_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpwssds_256_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusd_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
|
||||
; CHECK-NEXT: {vex} vpdpbusd %xmm1, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusd_256_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
|
||||
; CHECK-NEXT: {vex} vpdpbusd %ymm1, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusds:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusds_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
|
||||
; CHECK-NEXT: {vex} vpdpbusds %xmm1, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusds_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
|
||||
ret <8 x i32> %2
|
||||
}
|
||||
|
||||
define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
|
||||
; CHECK-LABEL: stack_fold_vpdpbusds_256_commuted:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: #APP
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: #NO_APP
|
||||
; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload
|
||||
; CHECK-NEXT: {vex} vpdpbusds %ymm1, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1)
|
||||
ret <8 x i32> %2
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
|
||||
|
||||
# CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd (%eax), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusd -1024(,%ebp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd 4064(%ecx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd -4096(%edx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd (%eax), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusd -512(,%ebp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd 2032(%ecx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd -2048(%edx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds (%eax), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusds -1024(,%ebp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds 4064(%ecx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds -4096(%edx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds (%eax), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusds -512(,%ebp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds 2032(%ecx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds -2048(%edx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd (%eax), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssd -1024(,%ebp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd 4064(%ecx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd -4096(%edx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd (%eax), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssd -512(,%ebp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd 2032(%ecx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd -2048(%edx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds (%eax), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssds -1024(,%ebp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds 4064(%ecx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds -4096(%edx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds (%eax), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssds -512(,%ebp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds 2032(%ecx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds -2048(%edx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [eax]
|
||||
0xc4,0xe2,0x55,0x50,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [eax]
|
||||
0xc4,0xe2,0x51,0x50,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [eax]
|
||||
0xc4,0xe2,0x55,0x51,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [eax]
|
||||
0xc4,0xe2,0x51,0x51,0x30
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [eax]
|
||||
0xc4,0xe2,0x55,0x52,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [eax]
|
||||
0xc4,0xe2,0x51,0x52,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [eax]
|
||||
0xc4,0xe2,0x55,0x53,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [eax]
|
||||
0xc4,0xe2,0x51,0x53,0x30
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rip]
|
||||
0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rip]
|
||||
0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rip]
|
||||
0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rip]
|
||||
0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rip]
|
||||
0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rip]
|
||||
0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4
|
||||
0xc4,0xe2,0x55,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4
|
||||
0xc4,0xe2,0x51,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rip]
|
||||
0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rip]
|
||||
0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
|
||||
|
||||
# CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd (%rip), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd -1024(,%rbp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd 4064(%rcx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd -4096(%rdx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd (%rip), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd -512(,%rbp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusd 2032(%rcx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusd -2048(%rdx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds (%rip), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds -1024(,%rbp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds 4064(%rcx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds -4096(%rdx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds (%rip), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds -512(,%rbp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpbusds 2032(%rcx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpbusds -2048(%rdx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd (%rip), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd -1024(,%rbp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd 4064(%rcx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd -4096(%rdx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd (%rip), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd -512(,%rbp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssd 2032(%rcx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssd -2048(%rdx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds %ymm4, %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds %xmm4, %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xf4
|
||||
|
||||
# CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds (%rip), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds -1024(,%rbp,2), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds 4064(%rcx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds -4096(%rdx), %ymm5, %ymm6
|
||||
0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds (%rip), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds -512(,%rbp,2), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff
|
||||
|
||||
# CHECK: {vex} vpdpwssds 2032(%rcx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00
|
||||
|
||||
# CHECK: {vex} vpdpwssds -2048(%rdx), %xmm5, %xmm6
|
||||
0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxvnni --show-encoding < %s | FileCheck %s
|
||||
|
||||
// CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4]
|
||||
{vex} vpdpbusd %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4]
|
||||
{vex} vpdpbusd %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd (%eax), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x30]
|
||||
{vex} vpdpbusd (%eax), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -1024(,%ebp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusd -1024(,%ebp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusd 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -4096(%edx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusd -4096(%edx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd (%eax), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x30]
|
||||
{vex} vpdpbusd (%eax), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -512(,%ebp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusd -512(,%ebp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusd 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -2048(%edx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusd -2048(%edx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4]
|
||||
{vex} vpdpbusds %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4]
|
||||
{vex} vpdpbusds %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds (%eax), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x30]
|
||||
{vex} vpdpbusds (%eax), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -1024(,%ebp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusds -1024(,%ebp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusds 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -4096(%edx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusds -4096(%edx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds (%eax), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x30]
|
||||
{vex} vpdpbusds (%eax), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -512(,%ebp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusds -512(,%ebp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusds 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -2048(%edx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusds -2048(%edx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4]
|
||||
{vex} vpdpwssd %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4]
|
||||
{vex} vpdpwssd %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd (%eax), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x30]
|
||||
{vex} vpdpwssd (%eax), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd -1024(,%ebp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssd -1024(,%ebp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssd 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd -4096(%edx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssd -4096(%edx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssd 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd (%eax), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x30]
|
||||
{vex} vpdpwssd (%eax), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd -512(,%ebp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssd -512(,%ebp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssd 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssd -2048(%edx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssd -2048(%edx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4]
|
||||
{vex} vpdpwssds %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4]
|
||||
{vex} vpdpwssds %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds 268435456(%esp,%esi,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds 291(%edi,%eax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds (%eax), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x30]
|
||||
{vex} vpdpwssds (%eax), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds -1024(,%ebp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssds -1024(,%ebp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssds 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds -4096(%edx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssds -4096(%edx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vpdpwssds 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds 268435456(%esp,%esi,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds 291(%edi,%eax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds (%eax), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x30]
|
||||
{vex} vpdpwssds (%eax), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds -512(,%ebp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssds -512(,%ebp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssds 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vpdpwssds -2048(%edx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssds -2048(%edx), %xmm5, %xmm6
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxvnni -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x30]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x30]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x30]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x30]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x30]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x30]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x30]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*ebp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [edx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [eax]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x30]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [eax]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*ebp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [edx - 2048]
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxvnni -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusd ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusd xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusds ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusds xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssd ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssd xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymm4
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*rbp - 1024]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssds ymm6, ymm5, ymmword ptr [rdx - 4096]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rip]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [rip]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*rbp - 512]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssds xmm6, xmm5, xmmword ptr [rdx - 2048]
|
||||
|
|
@ -0,0 +1,226 @@
|
|||
// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxvnni --show-encoding < %s | FileCheck %s
|
||||
|
||||
// CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4]
|
||||
{vex} vpdpbusd %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4]
|
||||
{vex} vpdpbusd %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd (%rip), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusd (%rip), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -1024(,%rbp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusd -1024(,%rbp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 4064(%rcx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusd 4064(%rcx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -4096(%rdx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusd -4096(%rdx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusd 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusd 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd (%rip), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusd (%rip), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -512(,%rbp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusd -512(,%rbp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd 2032(%rcx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusd 2032(%rcx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusd -2048(%rdx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusd -2048(%rdx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4]
|
||||
{vex} vpdpbusds %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4]
|
||||
{vex} vpdpbusds %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds (%rip), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusds (%rip), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -1024(,%rbp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpbusds -1024(,%rbp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 4064(%rcx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpbusds 4064(%rcx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -4096(%rdx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpbusds -4096(%rdx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpbusds 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpbusds 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds (%rip), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpbusds (%rip), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -512(,%rbp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpbusds -512(,%rbp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds 2032(%rcx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpbusds 2032(%rcx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpbusds -2048(%rdx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpbusds -2048(%rdx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4]
|
||||
{vex} vpdpwssd %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4]
|
||||
{vex} vpdpwssd %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd (%rip), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssd (%rip), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd -1024(,%rbp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssd -1024(,%rbp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd 4064(%rcx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssd 4064(%rcx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd -4096(%rdx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssd -4096(%rdx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssd 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssd 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd (%rip), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssd (%rip), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd -512(,%rbp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssd -512(,%rbp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd 2032(%rcx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssd 2032(%rcx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssd -2048(%rdx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssd -2048(%rdx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4]
|
||||
{vex} vpdpwssds %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4]
|
||||
{vex} vpdpwssds %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds 268435456(%rbp,%r14,8), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds 291(%r8,%rax,4), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds (%rip), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssds (%rip), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds -1024(,%rbp,2), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff]
|
||||
{vex} vpdpwssds -1024(,%rbp,2), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds 4064(%rcx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00]
|
||||
{vex} vpdpwssds 4064(%rcx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds -4096(%rdx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff]
|
||||
{vex} vpdpwssds -4096(%rdx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
{vex} vpdpwssds 268435456(%rbp,%r14,8), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00]
|
||||
{vex} vpdpwssds 291(%r8,%rax,4), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds (%rip), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00]
|
||||
{vex} vpdpwssds (%rip), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds -512(,%rbp,2), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff]
|
||||
{vex} vpdpwssds -512(,%rbp,2), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds 2032(%rcx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00]
|
||||
{vex} vpdpwssds 2032(%rcx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: {vex} vpdpwssds -2048(%rdx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff]
|
||||
{vex} vpdpwssds -2048(%rdx), %xmm5, %xmm6
|
||||
|
Loading…
Reference in New Issue