forked from OSchip/llvm-project
[X86] AVX512FP16 instructions enabling 5/6
Enable FP16 FMA instructions. Ref.: https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D105268
This commit is contained in:
parent
96ef794fd0
commit
c728bd5bba
|
@ -1995,6 +1995,25 @@ TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "av
|
|||
TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsubph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmsubaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_maskz, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask3, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
|
||||
TARGET_BUILTIN(__builtin_ia32_vfmsubsh3_mask3, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
|
||||
|
||||
// generic select intrinsics
|
||||
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
|
||||
|
|
|
@ -12050,6 +12050,22 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
|
|||
Intrinsic::ID IID = Intrinsic::not_intrinsic;
|
||||
switch (BuiltinID) {
|
||||
default: break;
|
||||
case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
|
||||
Subtract = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
|
||||
IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
|
||||
Subtract = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
|
||||
IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
|
||||
Subtract = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
|
@ -12113,22 +12129,30 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
|
|||
// Handle any required masking.
|
||||
Value *MaskFalseVal = nullptr;
|
||||
switch (BuiltinID) {
|
||||
case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
|
||||
MaskFalseVal = Ops[0];
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
|
||||
MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
|
||||
break;
|
||||
case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
|
||||
case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
|
||||
|
@ -12159,9 +12183,21 @@ static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
|
|||
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
|
||||
Value *Res;
|
||||
if (Rnd != 4) {
|
||||
Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
|
||||
Intrinsic::x86_avx512_vfmadd_f32 :
|
||||
Intrinsic::x86_avx512_vfmadd_f64;
|
||||
Intrinsic::ID IID;
|
||||
|
||||
switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
|
||||
case 16:
|
||||
IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
|
||||
break;
|
||||
case 32:
|
||||
IID = Intrinsic::x86_avx512_vfmadd_f32;
|
||||
break;
|
||||
case 64:
|
||||
IID = Intrinsic::x86_avx512_vfmadd_f64;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected size");
|
||||
}
|
||||
Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
|
||||
{Ops[0], Ops[1], Ops[2], Ops[4]});
|
||||
} else if (CGF.Builder.getIsFPConstrained()) {
|
||||
|
@ -12764,6 +12800,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
|
||||
case X86::BI__builtin_ia32_vfmaddss3:
|
||||
case X86::BI__builtin_ia32_vfmaddsd3:
|
||||
case X86::BI__builtin_ia32_vfmaddsh3_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddss3_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsd3_mask:
|
||||
return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
|
||||
|
@ -12771,20 +12808,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
case X86::BI__builtin_ia32_vfmaddsd:
|
||||
return EmitScalarFMAExpr(*this, E, Ops,
|
||||
Constant::getNullValue(Ops[0]->getType()));
|
||||
case X86::BI__builtin_ia32_vfmaddsh3_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddss3_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
|
||||
return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
|
||||
case X86::BI__builtin_ia32_vfmaddsh3_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddss3_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddsd3_mask3:
|
||||
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
|
||||
case X86::BI__builtin_ia32_vfmsubsh3_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubss3_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubsd3_mask3:
|
||||
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
|
||||
/*NegAcc*/ true);
|
||||
case X86::BI__builtin_ia32_vfmaddph:
|
||||
case X86::BI__builtin_ia32_vfmaddps:
|
||||
case X86::BI__builtin_ia32_vfmaddpd:
|
||||
case X86::BI__builtin_ia32_vfmaddph256:
|
||||
case X86::BI__builtin_ia32_vfmaddps256:
|
||||
case X86::BI__builtin_ia32_vfmaddpd256:
|
||||
case X86::BI__builtin_ia32_vfmaddph512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddph512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddph512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddps512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddps512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddps512_mask3:
|
||||
|
@ -12793,7 +12838,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
case X86::BI__builtin_ia32_vfmaddpd512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubph512_mask3:
|
||||
return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
|
||||
case X86::BI__builtin_ia32_vfmaddsubph512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
|
||||
|
|
|
@ -2423,6 +2423,492 @@ _mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
|
|||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_fmadd_round_ph(A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask3( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_maskz( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_fmsub_round_ph(A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_maskz( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_fnmadd_round_ph(A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask3( \
|
||||
-(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_maskz( \
|
||||
-(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_fnmsub_round_ph(A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)-1, (int)(R)))
|
||||
|
||||
#define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_maskz( \
|
||||
-(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A,
|
||||
__m512h __B,
|
||||
__m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A,
|
||||
__m512h __B,
|
||||
__m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
|
||||
-(__v32hf)__C, (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
|
||||
-(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_maskz(
|
||||
(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A,
|
||||
__m512h __B,
|
||||
__m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A,
|
||||
__m512h __B,
|
||||
__m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
|
||||
-(__v32hf)__C, (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_maskz(
|
||||
-(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_fmaddsub_round_ph(A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_mask3( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_fmsubadd_round_ph(A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
|
||||
(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
|
||||
(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_mask3(
|
||||
(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
|
||||
(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
|
||||
(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
|
||||
(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
|
||||
(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
|
||||
((__m512h)__builtin_ia32_vfmsubph512_mask3( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
|
||||
((__m512h)__builtin_ia32_vfmsubaddph512_mask3( \
|
||||
(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
return (__m512h)__builtin_ia32_vfmsubaddph512_mask3(
|
||||
(__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
|
||||
((__m512h)__builtin_ia32_vfmaddph512_mask( \
|
||||
(__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
#define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
|
||||
((__m512h)__builtin_ia32_vfmsubph512_mask3( \
|
||||
-(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
|
||||
(__mmask32)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
|
||||
-(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
|
||||
(__v32hf)__C, (__mmask32)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W,
|
||||
__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
|
||||
(__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W,
|
||||
__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
|
||||
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_fmadd_round_sh(A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_fmadd_round_sh(W, U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
|
||||
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_mask3_fmadd_round_sh(W, X, Y, U, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
|
||||
(__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W,
|
||||
__m128h __A,
|
||||
__m128h __B) {
|
||||
return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
|
||||
-(__v8hf)__B, (__mmask8)-1,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W,
|
||||
__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128h __B) {
|
||||
return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
|
||||
-(__v8hf)__B, (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_fmsub_round_sh(A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_fmsub_round_sh(W, U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
|
||||
-(__v8hf)__C, (__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
|
||||
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
|
||||
(__mmask8)(U), (int)R))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_mask3_fmsub_round_sh(W, X, Y, U, R) \
|
||||
((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
|
||||
(__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W,
|
||||
__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
|
||||
(__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
|
||||
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_fnmadd_round_sh(A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_fnmadd_round_sh(W, U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
|
||||
(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_mask3_fnmadd_round_sh(W, X, Y, U, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
|
||||
(__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W,
|
||||
__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
|
||||
(__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
|
||||
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_fnmsub_round_sh(A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_fnmsub_round_sh(W, U, A, B, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_mask( \
|
||||
(__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
|
||||
((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
|
||||
(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
|
||||
(__mmask8)__U,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
#define _mm_mask3_fnmsub_round_sh(W, X, Y, U, R) \
|
||||
((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
|
||||
(__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
|
||||
(__mmask8)(U), (int)(R)))
|
||||
|
||||
static __inline__ _Float16 __DEFAULT_FN_ATTRS512
|
||||
_mm512_reduce_add_ph(__m512h __W) {
|
||||
return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
|
||||
|
|
|
@ -1371,6 +1371,378 @@ _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
|
|||
(__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
|
||||
__mmask8 __U,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
|
||||
-(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
|
||||
__mmask8 __U,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
|
||||
__m256h __B,
|
||||
__m256h __C) {
|
||||
return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
|
||||
__m256h __B,
|
||||
__m256h __C) {
|
||||
return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
|
||||
-(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
|
||||
-(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)_mm_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
|
||||
-(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)_mm256_setzero_ph());
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
|
||||
(__v8hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
|
||||
__m256h __B,
|
||||
__m256h __C) {
|
||||
return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
|
||||
(__v16hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
|
||||
__m128h __B,
|
||||
__m128h __C) {
|
||||
return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
|
||||
-(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128
|
||||
_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
return (__m128h)__builtin_ia32_selectph_128(
|
||||
(__mmask8)__U,
|
||||
__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
|
||||
(__v8hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
|
||||
__m256h __B,
|
||||
__m256h __C) {
|
||||
return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
|
||||
-(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256h __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
return (__m256h)__builtin_ia32_selectph_256(
|
||||
(__mmask16)__U,
|
||||
__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
|
||||
(__v16hf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
|
||||
__m128h __A,
|
||||
__m128h __W) {
|
||||
|
|
|
@ -4084,6 +4084,9 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_vfmaddss3_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddss3_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddss3_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddsh3_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsh3_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsh3_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddpd512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddpd512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
|
||||
|
@ -4092,6 +4095,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_vfmaddps512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddps512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubps512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddph512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddph512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddph512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubph512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
|
||||
|
@ -4100,6 +4107,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmaddsubph512_mask:
|
||||
case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
|
||||
case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
|
||||
case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
|
||||
ArgNum = 4;
|
||||
HasRC = true;
|
||||
break;
|
||||
|
|
|
@ -3163,6 +3163,839 @@ __m256h test_mm512_maskz_cvtxps_ph(__mmask16 A, __m512 B) {
|
|||
return _mm512_maskz_cvtxps_ph(A, B);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmadd_round_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmadd_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
return _mm512_fmadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmadd_round_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmadd_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmadd_round_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmadd_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmadd_round_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmadd_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmsub_round_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
return _mm512_fmsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmsub_round_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmsub_round_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fnmadd_round_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fnmadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
return _mm512_fnmadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fnmadd_round_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fnmadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fnmadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fnmadd_round_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fnmadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fnmadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fnmsub_round_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fnmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
return _mm512_fnmsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fnmsub_round_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fnmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fnmsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmadd_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmadd_ph
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
return _mm512_fmadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmadd_ph
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
return _mm512_mask_fmadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmadd_ph
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmadd_ph
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmsub_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
return _mm512_fmsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
return _mm512_fnmadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fnmadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fnmadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
return _mm512_fnmsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fnmsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmaddsub_round_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmaddsub_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
return _mm512_fmaddsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmaddsub_round_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmaddsub_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmaddsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmaddsub_round_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmaddsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmaddsub_round_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ph
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmaddsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmsubadd_round_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmsubadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
return _mm512_fmsubadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmsubadd_round_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmsubadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmsubadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmsubadd_round_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmsubadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}, i32 4)
|
||||
return _mm512_fmaddsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}, i32 4)
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmaddsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}, i32 4)
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmaddsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}, i32 4)
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmaddsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> [[NEG]], i32 4)
|
||||
return _mm512_fmsubadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> [[NEG]], i32 4)
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fmsubadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> [[NEG]], i32 4)
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> zeroinitializer
|
||||
return _mm512_maskz_fmsubadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmsub_round_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmsubadd_round_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmaddsub.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmsubadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> [[NEG]], i32 4)
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fmsubadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fnmadd_round_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fnmadd_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fnmadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fnmadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fnmsub_round_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fnmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fnmsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fnmsub_round_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fnmsub_round_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: @llvm.x86.avx512fp16.vfmadd.ph.512
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fnmsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
|
||||
// CHECK-LABEL: @test_mm512_mask_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask_fnmsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m512h test_mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
|
||||
// CHECK-LABEL: @test_mm512_mask3_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <32 x half> @llvm.fma.v32f16(<32 x half> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}})
|
||||
// CHECK: bitcast i32 %{{.*}} to <32 x i1>
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}}
|
||||
return _mm512_mask3_fnmsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmadd_sh(__m128h __W, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_fmadd_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
return _mm_fmadd_sh(__W, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmadd_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[A]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_mask_fmadd_sh(__W, __U, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmadd_round_sh(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fmadd_round_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[FMA]], i64 0
|
||||
return _mm_fmadd_round_sh(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmadd_round_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmadd_round_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[A]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_mask_fmadd_round_sh(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmadd_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half 0xH0000
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_maskz_fmadd_sh(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmadd_round_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmadd_round_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half 0xH0000
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_maskz_fmadd_round_sh(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmadd_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fmadd_sh(__W, __X, __Y, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmadd_round_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmadd_round_sh
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fmadd_round_sh(__W, __X, __Y, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmsub_sh(__m128h __W, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_fmsub_sh
|
||||
// CHECK: %{{.*}} = fneg <8 x half> %{{.*}}
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = call half @llvm.fma.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
|
||||
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
|
||||
// CHECK-NEXT: ret <8 x half> %{{.*}}
|
||||
return _mm_fmsub_sh(__W, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmsub_sh
|
||||
// CHECK: %{{.*}} = fneg <8 x half> %{{.*}}
|
||||
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = call half @llvm.fma.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
|
||||
// CHECK-NEXT: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = select i1 %{{.*}}, half %{{.*}}, half %{{.*}}
|
||||
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
|
||||
// CHECK-NEXT: ret <8 x half> %{{.*}}
|
||||
return _mm_mask_fmsub_sh(__W, __U, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmsub_round_sh(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fmsub_round_sh
|
||||
// CHECK: %{{.*}} = fneg <8 x half> %{{.*}}
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = call half @llvm.x86.avx512fp16.vfmadd.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, i32 11)
|
||||
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
|
||||
// CHECK-NEXT: ret <8 x half> %{{.*}}
|
||||
return _mm_fmsub_round_sh(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmsub_round_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmsub_round_sh
|
||||
// CHECK: %{{.*}} = fneg <8 x half> %{{.*}}
|
||||
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = call half @llvm.x86.avx512fp16.vfmadd.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, i32 11)
|
||||
// CHECK-NEXT: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = select i1 %{{.*}}, half %{{.*}}, half %{{.*}}
|
||||
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
|
||||
// CHECK-NEXT: ret <8 x half> %{{.*}}
|
||||
return _mm_mask_fmsub_round_sh(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmsub_sh
|
||||
// CHECK: %{{.*}} = fneg <8 x half> %{{.*}}
|
||||
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = call half @llvm.fma.f16(half %{{.*}}, half %{{.*}}, half %{{.*}})
|
||||
// CHECK-NEXT: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = select i1 %{{.*}}, half %{{.*}}, half 0xH0000
|
||||
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
|
||||
// CHECK-NEXT: ret <8 x half> %{{.*}}
|
||||
return _mm_maskz_fmsub_sh(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmsub_round_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmsub_round_sh
|
||||
// CHECK: %{{.*}} = fneg <8 x half> %{{.*}}
|
||||
// CHECK: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = call half @llvm.x86.avx512fp16.vfmadd.f16(half %{{.*}}, half %{{.*}}, half %{{.*}}, i32 11)
|
||||
// CHECK-NEXT: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: %{{.*}} = extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: %{{.*}} = select i1 %{{.*}}, half %{{.*}}, half 0xH0000
|
||||
// CHECK-NEXT: %{{.*}} = insertelement <8 x half> %{{.*}}, half %{{.*}}, i64 0
|
||||
// CHECK-NEXT: ret <8 x half> %{{.*}}
|
||||
return _mm_maskz_fmsub_round_sh(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmsub_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: [[C2:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C2]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fmsub_sh(__W, __X, __Y, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmsub_round_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmsub_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: [[C2:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C2]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fmsub_round_sh(__W, __X, __Y, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_fnmadd_sh(__m128h __W, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_fnmadd_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
return _mm_fnmadd_sh(__W, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fnmadd_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[A]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_mask_fnmadd_sh(__W, __U, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_fnmadd_round_sh(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fnmadd_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[FMA]], i64 0
|
||||
return _mm_fnmadd_round_sh(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fnmadd_round_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fnmadd_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[A]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_mask_fnmadd_round_sh(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fnmadd_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half 0xH0000
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_maskz_fnmadd_sh(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fnmadd_round_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fnmadd_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half 0xH0000
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_maskz_fnmadd_round_sh(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fnmadd_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fnmadd_sh(__W, __X, __Y, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fnmadd_round_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fnmadd_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fnmadd_round_sh(__W, __X, __Y, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_fnmsub_sh(__m128h __W, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_fnmsub_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
return _mm_fnmsub_sh(__W, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fnmsub_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[A]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_mask_fnmsub_sh(__W, __U, __A, __B);
|
||||
}
|
||||
|
||||
__m128h test_mm_fnmsub_round_sh(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fnmsub_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[FMA]], i64 0
|
||||
return _mm_fnmsub_round_sh(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fnmsub_round_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_fnmsub_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[A]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_mask_fnmsub_round_sh(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fnmsub_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half 0xH0000
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_maskz_fnmsub_sh(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fnmsub_round_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fnmsub_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> [[ORIGA:%.+]], i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half 0xH0000
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGA]], half [[SEL]], i64 0
|
||||
return _mm_maskz_fnmsub_round_sh(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fnmsub_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.fma.f16(half [[A]], half [[B]], half [[C]])
|
||||
// CHECK-NEXT: [[C2:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C2]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fnmsub_sh(__W, __X, __Y, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fnmsub_round_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fnmsub_round_sh
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: [[NEG2:%.+]] = fneg
|
||||
// CHECK: [[A:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[B:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[C:%.+]] = extractelement <8 x half> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[FMA:%.+]] = call half @llvm.x86.avx512fp16.vfmadd.f16(half [[A]], half [[B]], half [[C]], i32 11)
|
||||
// CHECK-NEXT: [[C2:%.+]] = extractelement <8 x half> [[ORIGC:%.+]], i64 0
|
||||
// CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
|
||||
// CHECK-NEXT: [[SEL:%.+]] = select i1 %{{.*}}, half [[FMA]], half [[C2]]
|
||||
// CHECK-NEXT: insertelement <8 x half> [[ORIGC]], half [[SEL]], i64 0
|
||||
return _mm_mask3_fnmsub_round_sh(__W, __X, __Y, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
}
|
||||
|
||||
_Float16 test_mm512_reduce_add_ph(__m512h __W) {
|
||||
// CHECK-LABEL: @test_mm512_reduce_add_ph
|
||||
// CHECK: call reassoc half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
|
||||
|
|
|
@ -2321,6 +2321,411 @@ __m128h test_mm256_maskz_cvtxps_ph(__mmask8 A, __m256 B) {
|
|||
return _mm256_maskz_cvtxps_ph(A, B);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fmadd_ph
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
return _mm_fmadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmadd_ph
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask_fmadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
return _mm_fmsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask_fmsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmadd_ph
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask3_fmadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask3_fnmadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmadd_ph
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_maskz_fmadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_maskz_fmsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_maskz_fnmadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_maskz_fnmsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_fmadd_ph
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
return _mm256_fmadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_fmadd_ph(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_mask_fmadd_ph
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask_fmadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
return _mm256_fmsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_mask_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask_fmsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
// CHECK-LABEL: @test_mm256_mask3_fmadd_ph
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask3_fmadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
// CHECK-LABEL: @test_mm256_mask3_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask3_fnmadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_fmadd_ph
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_maskz_fmadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_maskz_fmsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_maskz_fnmadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_maskz_fnmsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
return _mm_fmaddsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask_fmaddsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> [[NEG]])
|
||||
return _mm_fmsubadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_mask_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> [[NEG]])
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask_fmsubadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask3_fmaddsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_maskz_fmaddsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_maskz_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> [[NEG]])
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_maskz_fmsubadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
return _mm256_fmaddsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_mask_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask_fmaddsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> [[NEG]])
|
||||
return _mm256_fmsubadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_mask_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> [[NEG]])
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask_fmsubadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
// CHECK-LABEL: @test_mm256_mask3_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask3_fmaddsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_fmaddsub_ph
|
||||
// CHECK-NOT: fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_maskz_fmaddsub_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> [[NEG]])
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_maskz_fmsubadd_ph(__U, __A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask3_fmsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
// CHECK-LABEL: @test_mm256_mask3_fmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask3_fmsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> [[NEG]])
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask3_fmsubadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
// CHECK-LABEL: @test_mm256_mask3_fmsubadd_ph
|
||||
// CHECK: [[NEG:%.+]] = fneg
|
||||
// CHECK: call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> [[NEG]])
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask3_fmsubadd_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m128h test_mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
return _mm_fnmadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_mask_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask_fnmadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
return _mm256_fnmadd_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_mask_fnmadd_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask_fnmadd_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
return _mm_fnmsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
|
||||
// CHECK-LABEL: @test_mm_mask_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask_fnmsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m128h test_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
|
||||
// CHECK-LABEL: @test_mm_mask3_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <8 x half> @llvm.fma.v8f16(<8 x half> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}})
|
||||
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
|
||||
return _mm_mask3_fnmsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
|
||||
__m256h test_mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
return _mm256_fnmsub_ph(__A, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
|
||||
// CHECK-LABEL: @test_mm256_mask_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
|
||||
return _mm256_mask_fnmsub_ph(__A, __U, __B, __C);
|
||||
}
|
||||
|
||||
__m256h test_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
|
||||
// CHECK-LABEL: @test_mm256_mask3_fnmsub_ph
|
||||
// CHECK: fneg
|
||||
// CHECK: fneg
|
||||
// CHECK: call <16 x half> @llvm.fma.v16f16(<16 x half> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}})
|
||||
return _mm256_mask3_fnmsub_ph(__A, __B, __C, __U);
|
||||
}
|
||||
__m128h test_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
|
||||
// CHECK-LABEL: @test_mm_mask_blend_ph
|
||||
// CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
|
||||
|
|
|
@ -5709,4 +5709,27 @@ let TargetPrefix = "x86" in {
|
|||
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
|
||||
llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
|
||||
|
||||
def int_x86_avx512fp16_vfmadd_ph_512
|
||||
: Intrinsic<[ llvm_v32f16_ty ],
|
||||
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_vfmaddsub_ph_128
|
||||
: GCCBuiltin<"__builtin_ia32_vfmaddsubph">,
|
||||
Intrinsic<[ llvm_v8f16_ty ],
|
||||
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ],
|
||||
[ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_vfmaddsub_ph_256
|
||||
: GCCBuiltin<"__builtin_ia32_vfmaddsubph256">,
|
||||
Intrinsic<[ llvm_v16f16_ty ],
|
||||
[ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ],
|
||||
[ IntrNoMem ]>;
|
||||
def int_x86_avx512fp16_vfmaddsub_ph_512
|
||||
: Intrinsic<[ llvm_v32f16_ty ],
|
||||
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
def int_x86_avx512fp16_vfmadd_f16
|
||||
: Intrinsic<[ llvm_half_ty ],
|
||||
[ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ],
|
||||
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
|
||||
}
|
||||
|
|
|
@ -1934,6 +1934,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::LOAD, VT, Legal);
|
||||
setOperationAction(ISD::STORE, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::FMA, VT, Legal);
|
||||
setOperationAction(ISD::STRICT_FMA, VT, Legal);
|
||||
setOperationAction(ISD::VSELECT, VT, Legal);
|
||||
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
|
@ -32720,6 +32722,8 @@ bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
|||
return false;
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::f16:
|
||||
return Subtarget.hasFP16();
|
||||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
return true;
|
||||
|
@ -49021,7 +49025,9 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
EVT ScalarVT = VT.getScalarType();
|
||||
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
|
||||
if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
|
||||
!Subtarget.hasAnyFMA()) &&
|
||||
!(ScalarVT == MVT::f16 && Subtarget.hasFP16()))
|
||||
return SDValue();
|
||||
|
||||
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
|
||||
|
|
|
@ -6760,14 +6760,14 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
|
||||
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
|
||||
AVX512FMA3Base, Sched<[sched]>;
|
||||
EVEX_4V, Sched<[sched]>;
|
||||
|
||||
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
|
||||
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
|
||||
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
|
@ -6777,7 +6777,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
|
||||
(MaskOpNode _.RC:$src2,
|
||||
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
|
||||
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6791,21 +6791,22 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
|
||||
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
|
||||
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
|
||||
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
|
||||
EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
SDNode MaskOpNode, SDNode OpNodeRnd,
|
||||
X86SchedWriteWidths sched,
|
||||
AVX512VLVectorVTInfo _, string Suff> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
AVX512VLVectorVTInfo _, string Suff,
|
||||
Predicate prd = HasAVX512> {
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
|
||||
sched.ZMM, _.info512, Suff>,
|
||||
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
_.info512, Suff>,
|
||||
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
|
||||
}
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
let Predicates = [HasVLX, prd] in {
|
||||
defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
|
||||
sched.YMM, _.info256, Suff>,
|
||||
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
|
||||
|
@ -6817,12 +6818,15 @@ multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOpera
|
|||
|
||||
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
SDNode MaskOpNode, SDNode OpNodeRnd> {
|
||||
defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f16_info, "PH", HasFP16>, T_MAP6PD;
|
||||
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f32_info, "PS">;
|
||||
avx512vl_f32_info, "PS">, T8PD;
|
||||
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f64_info, "PD">, VEX_W;
|
||||
avx512vl_f64_info, "PD">, T8PD, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
|
||||
|
@ -6849,14 +6853,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(null_frag),
|
||||
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
|
||||
AVX512FMA3Base, Sched<[sched]>;
|
||||
EVEX_4V, Sched<[sched]>;
|
||||
|
||||
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
|
||||
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
|
||||
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
|
@ -6867,7 +6871,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
_.RC:$src1)),
|
||||
(_.VT (MaskOpNode _.RC:$src2,
|
||||
(_.VT (_.BroadcastLdFrag addr:$src3)),
|
||||
_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
|
||||
_.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -6882,21 +6886,22 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
|
||||
(null_frag),
|
||||
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
|
||||
1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
|
||||
1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
SDNode MaskOpNode, SDNode OpNodeRnd,
|
||||
X86SchedWriteWidths sched,
|
||||
AVX512VLVectorVTInfo _, string Suff> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
AVX512VLVectorVTInfo _, string Suff,
|
||||
Predicate prd = HasAVX512> {
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
|
||||
sched.ZMM, _.info512, Suff>,
|
||||
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
_.info512, Suff>,
|
||||
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
|
||||
}
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
let Predicates = [HasVLX, prd] in {
|
||||
defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
|
||||
sched.YMM, _.info256, Suff>,
|
||||
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
|
||||
|
@ -6908,12 +6913,15 @@ multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOpera
|
|||
|
||||
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
SDNode MaskOpNode, SDNode OpNodeRnd > {
|
||||
defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f16_info, "PH", HasFP16>, T_MAP6PD;
|
||||
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f32_info, "PS">;
|
||||
avx512vl_f32_info, "PS">, T8PD;
|
||||
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f64_info, "PD">, VEX_W;
|
||||
avx512vl_f64_info, "PD">, T8PD, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
|
||||
|
@ -6939,7 +6947,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(null_frag),
|
||||
(_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
|
||||
AVX512FMA3Base, Sched<[sched]>;
|
||||
EVEX_4V, Sched<[sched]>;
|
||||
|
||||
// Pattern is 312 order so that the load is in a different place from the
|
||||
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
|
||||
|
@ -6948,7 +6956,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
|
||||
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
|
||||
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
// Pattern is 312 order so that the load is in a different place from the
|
||||
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
|
||||
|
@ -6960,7 +6968,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
|
|||
_.RC:$src1, _.RC:$src2)),
|
||||
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
|
||||
_.RC:$src1, _.RC:$src2)), 1, 0>,
|
||||
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6974,21 +6982,22 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
|
||||
(null_frag),
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
|
||||
1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
|
||||
1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
|
||||
}
|
||||
|
||||
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
SDNode MaskOpNode, SDNode OpNodeRnd,
|
||||
X86SchedWriteWidths sched,
|
||||
AVX512VLVectorVTInfo _, string Suff> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
AVX512VLVectorVTInfo _, string Suff,
|
||||
Predicate prd = HasAVX512> {
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
|
||||
sched.ZMM, _.info512, Suff>,
|
||||
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
|
||||
_.info512, Suff>,
|
||||
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
|
||||
}
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
let Predicates = [HasVLX, prd] in {
|
||||
defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
|
||||
sched.YMM, _.info256, Suff>,
|
||||
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
|
||||
|
@ -7000,12 +7009,15 @@ multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOpera
|
|||
|
||||
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
SDNode MaskOpNode, SDNode OpNodeRnd > {
|
||||
defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f16_info, "PH", HasFP16>, T_MAP6PD;
|
||||
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f32_info, "PS">;
|
||||
avx512vl_f32_info, "PS">, T8PD;
|
||||
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
|
||||
OpNodeRnd, SchedWriteFMA,
|
||||
avx512vl_f64_info, "PD">, VEX_W;
|
||||
avx512vl_f64_info, "PD">, T8PD, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
|
||||
|
@ -7028,39 +7040,39 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
|
|||
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
|
||||
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
|
||||
AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
|
||||
EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
|
||||
|
||||
let mayLoad = 1 in
|
||||
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
|
||||
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
|
||||
AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
|
||||
EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
|
||||
|
||||
let Uses = [MXCSR] in
|
||||
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
|
||||
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
|
||||
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
|
||||
EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
|
||||
|
||||
let isCodeGenOnly = 1, isCommutable = 1 in {
|
||||
def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
|
||||
def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
!if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
|
||||
def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
|
||||
[RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
|
||||
|
||||
let Uses = [MXCSR] in
|
||||
def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
|
||||
!if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
|
||||
Sched<[SchedWriteFMA.Scl]>;
|
||||
Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
|
||||
}// isCodeGenOnly = 1
|
||||
}// Constraints = "$src1 = $dst"
|
||||
}
|
||||
|
@ -7104,10 +7116,15 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
|
|||
let Predicates = [HasAVX512] in {
|
||||
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
|
||||
OpNodeRnd, f32x_info, "SS">,
|
||||
EVEX_CD8<32, CD8VT1>, VEX_LIG;
|
||||
EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
|
||||
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
|
||||
OpNodeRnd, f64x_info, "SD">,
|
||||
EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
|
||||
EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
|
||||
}
|
||||
let Predicates = [HasFP16] in {
|
||||
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
|
||||
OpNodeRnd, f16x_info, "SH">,
|
||||
EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7119,8 +7136,9 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86Fnmsu
|
|||
multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
|
||||
SDNode RndOp, string Prefix,
|
||||
string Suffix, SDNode Move,
|
||||
X86VectorVTInfo _, PatLeaf ZeroFP> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
X86VectorVTInfo _, PatLeaf ZeroFP,
|
||||
Predicate prd = HasAVX512> {
|
||||
let Predicates = [prd] in {
|
||||
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
|
||||
(Op _.FRC:$src2,
|
||||
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
|
||||
|
@ -7318,6 +7336,14 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
|
|||
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
|
||||
}
|
||||
}
|
||||
defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
|
||||
X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
|
||||
X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
|
||||
X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
|
||||
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
|
||||
X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
|
||||
|
||||
defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
|
||||
"SS", X86Movss, v4f32x_info, fp32imm0>;
|
||||
|
@ -7350,13 +7376,13 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(ins _.RC:$src2, _.RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
|
||||
AVX512FMA3Base, Sched<[sched]>;
|
||||
T8PD, EVEX_4V, Sched<[sched]>;
|
||||
|
||||
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
|
||||
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
|
@ -7365,7 +7391,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(OpNode _.RC:$src2,
|
||||
(_.VT (_.BroadcastLdFrag addr:$src3)),
|
||||
_.RC:$src1)>,
|
||||
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
@ -12355,13 +12381,13 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
|
|||
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
|
||||
"$src3, $src2", "$src2, $src3",
|
||||
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
|
||||
AVX512FMA3Base, Sched<[sched]>;
|
||||
T8PD, EVEX_4V, Sched<[sched]>;
|
||||
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
|
||||
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
|
||||
"$src3, $src2", "$src2, $src3",
|
||||
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
|
||||
(VTI.VT (VTI.LdFrag addr:$src3))))>,
|
||||
AVX512FMA3Base,
|
||||
T8PD, EVEX_4V,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
}
|
||||
|
@ -12377,7 +12403,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
|
|||
"$src2, ${src3}"#VTI.BroadcastStr,
|
||||
(OpNode VTI.RC:$src1, VTI.RC:$src2,
|
||||
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
|
||||
AVX512FMA3Base, EVEX_B,
|
||||
T8PD, EVEX_4V, EVEX_B,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,35 +28,43 @@ using namespace llvm;
|
|||
FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
|
||||
FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
|
||||
|
||||
#define FMA3GROUP_PACKED_WIDTHS(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Ym, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Yr, Attrs) \
|
||||
#define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
|
||||
|
||||
#define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Ym, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Yr, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##m, Attrs) \
|
||||
FMA3GROUP(Name, Suf##r, Attrs)
|
||||
|
||||
#define FMA3GROUP_PACKED(Name, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS(Name, PD, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS(Name, PS, Attrs)
|
||||
FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
|
||||
FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
|
||||
|
||||
#define FMA3GROUP_SCALAR_WIDTHS(Name, Suf, Attrs) \
|
||||
#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##Zm, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
|
||||
FMA3GROUP(Name, Suf##Zr, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
|
||||
|
||||
#define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, Suf##m, Attrs) \
|
||||
FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
|
||||
FMA3GROUP(Name, Suf##r, Attrs) \
|
||||
FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
|
||||
|
||||
#define FMA3GROUP_SCALAR(Name, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs)
|
||||
FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
|
||||
FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
|
||||
|
||||
#define FMA3GROUP_FULL(Name, Attrs) \
|
||||
FMA3GROUP_PACKED(Name, Attrs) \
|
||||
|
@ -78,15 +86,19 @@ static const X86InstrFMA3Group Groups[] = {
|
|||
|
||||
#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
|
||||
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
|
||||
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
|
||||
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
|
||||
|
||||
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
|
||||
|
||||
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
|
||||
FMA3GROUP(Name, SDZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
|
||||
FMA3GROUP(Name, SHZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
|
||||
FMA3GROUP(Name, SSZ##Suf, Attrs) \
|
||||
FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
|
||||
|
||||
|
@ -130,14 +142,16 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
|
|||
|
||||
// FMA3 instructions have a well defined encoding pattern we can exploit.
|
||||
uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
|
||||
bool IsFMA3 = ((TSFlags & X86II::EncodingMask) == X86II::VEX ||
|
||||
(TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
|
||||
(TSFlags & X86II::OpMapMask) == X86II::T8 &&
|
||||
(TSFlags & X86II::OpPrefixMask) == X86II::PD &&
|
||||
((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
|
||||
(BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
|
||||
(BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
|
||||
if (!IsFMA3)
|
||||
bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
|
||||
(BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
|
||||
(BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
|
||||
bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
|
||||
(TSFlags & X86II::OpMapMask) == X86II::T8) ||
|
||||
((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
|
||||
((TSFlags & X86II::OpMapMask) == X86II::T8 ||
|
||||
(TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
|
||||
bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
|
||||
if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
|
||||
return nullptr;
|
||||
|
||||
verifyTables();
|
||||
|
|
|
@ -3288,6 +3288,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0 },
|
||||
{ X86::VFMADD132PDZr, X86::VFMADD132PDZm, 0 },
|
||||
{ X86::VFMADD132PDr, X86::VFMADD132PDm, 0 },
|
||||
{ X86::VFMADD132PHZ128r, X86::VFMADD132PHZ128m, 0 },
|
||||
{ X86::VFMADD132PHZ256r, X86::VFMADD132PHZ256m, 0 },
|
||||
{ X86::VFMADD132PHZr, X86::VFMADD132PHZm, 0 },
|
||||
{ X86::VFMADD132PSYr, X86::VFMADD132PSYm, 0 },
|
||||
{ X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, 0 },
|
||||
{ X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, 0 },
|
||||
|
@ -3297,6 +3300,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SDr, X86::VFMADD132SDm, 0 },
|
||||
{ X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SHZr, X86::VFMADD132SHZm, 0 },
|
||||
{ X86::VFMADD132SHZr_Int, X86::VFMADD132SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SSZr, X86::VFMADD132SSZm, 0 },
|
||||
{ X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SSr, X86::VFMADD132SSm, 0 },
|
||||
|
@ -3306,6 +3311,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0 },
|
||||
{ X86::VFMADD213PDZr, X86::VFMADD213PDZm, 0 },
|
||||
{ X86::VFMADD213PDr, X86::VFMADD213PDm, 0 },
|
||||
{ X86::VFMADD213PHZ128r, X86::VFMADD213PHZ128m, 0 },
|
||||
{ X86::VFMADD213PHZ256r, X86::VFMADD213PHZ256m, 0 },
|
||||
{ X86::VFMADD213PHZr, X86::VFMADD213PHZm, 0 },
|
||||
{ X86::VFMADD213PSYr, X86::VFMADD213PSYm, 0 },
|
||||
{ X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, 0 },
|
||||
{ X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, 0 },
|
||||
|
@ -3315,6 +3323,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SDr, X86::VFMADD213SDm, 0 },
|
||||
{ X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SHZr, X86::VFMADD213SHZm, 0 },
|
||||
{ X86::VFMADD213SHZr_Int, X86::VFMADD213SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SSZr, X86::VFMADD213SSZm, 0 },
|
||||
{ X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SSr, X86::VFMADD213SSm, 0 },
|
||||
|
@ -3324,6 +3334,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0 },
|
||||
{ X86::VFMADD231PDZr, X86::VFMADD231PDZm, 0 },
|
||||
{ X86::VFMADD231PDr, X86::VFMADD231PDm, 0 },
|
||||
{ X86::VFMADD231PHZ128r, X86::VFMADD231PHZ128m, 0 },
|
||||
{ X86::VFMADD231PHZ256r, X86::VFMADD231PHZ256m, 0 },
|
||||
{ X86::VFMADD231PHZr, X86::VFMADD231PHZm, 0 },
|
||||
{ X86::VFMADD231PSYr, X86::VFMADD231PSYm, 0 },
|
||||
{ X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, 0 },
|
||||
{ X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, 0 },
|
||||
|
@ -3333,6 +3346,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SDr, X86::VFMADD231SDm, 0 },
|
||||
{ X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SHZr, X86::VFMADD231SHZm, 0 },
|
||||
{ X86::VFMADD231SHZr_Int, X86::VFMADD231SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SSZr, X86::VFMADD231SSZm, 0 },
|
||||
{ X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SSr, X86::VFMADD231SSm, 0 },
|
||||
|
@ -3350,6 +3365,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, 0 },
|
||||
{ X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, 0 },
|
||||
{ X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, 0 },
|
||||
{ X86::VFMADDSUB132PHZ128r, X86::VFMADDSUB132PHZ128m, 0 },
|
||||
{ X86::VFMADDSUB132PHZ256r, X86::VFMADDSUB132PHZ256m, 0 },
|
||||
{ X86::VFMADDSUB132PHZr, X86::VFMADDSUB132PHZm, 0 },
|
||||
{ X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, 0 },
|
||||
{ X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, 0 },
|
||||
{ X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, 0 },
|
||||
|
@ -3360,6 +3378,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, 0 },
|
||||
{ X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, 0 },
|
||||
{ X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, 0 },
|
||||
{ X86::VFMADDSUB213PHZ128r, X86::VFMADDSUB213PHZ128m, 0 },
|
||||
{ X86::VFMADDSUB213PHZ256r, X86::VFMADDSUB213PHZ256m, 0 },
|
||||
{ X86::VFMADDSUB213PHZr, X86::VFMADDSUB213PHZm, 0 },
|
||||
{ X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, 0 },
|
||||
{ X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, 0 },
|
||||
{ X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, 0 },
|
||||
|
@ -3370,6 +3391,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, 0 },
|
||||
{ X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, 0 },
|
||||
{ X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, 0 },
|
||||
{ X86::VFMADDSUB231PHZ128r, X86::VFMADDSUB231PHZ128m, 0 },
|
||||
{ X86::VFMADDSUB231PHZ256r, X86::VFMADDSUB231PHZ256m, 0 },
|
||||
{ X86::VFMADDSUB231PHZr, X86::VFMADDSUB231PHZm, 0 },
|
||||
{ X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, 0 },
|
||||
{ X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, 0 },
|
||||
{ X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, 0 },
|
||||
|
@ -3384,6 +3408,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0 },
|
||||
{ X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, 0 },
|
||||
{ X86::VFMSUB132PDr, X86::VFMSUB132PDm, 0 },
|
||||
{ X86::VFMSUB132PHZ128r, X86::VFMSUB132PHZ128m, 0 },
|
||||
{ X86::VFMSUB132PHZ256r, X86::VFMSUB132PHZ256m, 0 },
|
||||
{ X86::VFMSUB132PHZr, X86::VFMSUB132PHZm, 0 },
|
||||
{ X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, 0 },
|
||||
{ X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, 0 },
|
||||
{ X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, 0 },
|
||||
|
@ -3393,6 +3420,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SDr, X86::VFMSUB132SDm, 0 },
|
||||
{ X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SHZr, X86::VFMSUB132SHZm, 0 },
|
||||
{ X86::VFMSUB132SHZr_Int, X86::VFMSUB132SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, 0 },
|
||||
{ X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0 },
|
||||
|
@ -3402,6 +3431,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0 },
|
||||
{ X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, 0 },
|
||||
{ X86::VFMSUB213PDr, X86::VFMSUB213PDm, 0 },
|
||||
{ X86::VFMSUB213PHZ128r, X86::VFMSUB213PHZ128m, 0 },
|
||||
{ X86::VFMSUB213PHZ256r, X86::VFMSUB213PHZ256m, 0 },
|
||||
{ X86::VFMSUB213PHZr, X86::VFMSUB213PHZm, 0 },
|
||||
{ X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, 0 },
|
||||
{ X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, 0 },
|
||||
{ X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, 0 },
|
||||
|
@ -3411,6 +3443,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SDr, X86::VFMSUB213SDm, 0 },
|
||||
{ X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SHZr, X86::VFMSUB213SHZm, 0 },
|
||||
{ X86::VFMSUB213SHZr_Int, X86::VFMSUB213SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, 0 },
|
||||
{ X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0 },
|
||||
|
@ -3420,6 +3454,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0 },
|
||||
{ X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, 0 },
|
||||
{ X86::VFMSUB231PDr, X86::VFMSUB231PDm, 0 },
|
||||
{ X86::VFMSUB231PHZ128r, X86::VFMSUB231PHZ128m, 0 },
|
||||
{ X86::VFMSUB231PHZ256r, X86::VFMSUB231PHZ256m, 0 },
|
||||
{ X86::VFMSUB231PHZr, X86::VFMSUB231PHZm, 0 },
|
||||
{ X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, 0 },
|
||||
{ X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, 0 },
|
||||
{ X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, 0 },
|
||||
|
@ -3429,6 +3466,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SDr, X86::VFMSUB231SDm, 0 },
|
||||
{ X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SHZr, X86::VFMSUB231SHZm, 0 },
|
||||
{ X86::VFMSUB231SHZr_Int, X86::VFMSUB231SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, 0 },
|
||||
{ X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SSr, X86::VFMSUB231SSm, 0 },
|
||||
|
@ -3438,6 +3477,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, 0 },
|
||||
{ X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, 0 },
|
||||
{ X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, 0 },
|
||||
{ X86::VFMSUBADD132PHZ128r, X86::VFMSUBADD132PHZ128m, 0 },
|
||||
{ X86::VFMSUBADD132PHZ256r, X86::VFMSUBADD132PHZ256m, 0 },
|
||||
{ X86::VFMSUBADD132PHZr, X86::VFMSUBADD132PHZm, 0 },
|
||||
{ X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, 0 },
|
||||
{ X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, 0 },
|
||||
{ X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, 0 },
|
||||
|
@ -3448,6 +3490,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, 0 },
|
||||
{ X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, 0 },
|
||||
{ X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, 0 },
|
||||
{ X86::VFMSUBADD213PHZ128r, X86::VFMSUBADD213PHZ128m, 0 },
|
||||
{ X86::VFMSUBADD213PHZ256r, X86::VFMSUBADD213PHZ256m, 0 },
|
||||
{ X86::VFMSUBADD213PHZr, X86::VFMSUBADD213PHZm, 0 },
|
||||
{ X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, 0 },
|
||||
{ X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, 0 },
|
||||
{ X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, 0 },
|
||||
|
@ -3458,6 +3503,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, 0 },
|
||||
{ X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, 0 },
|
||||
{ X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, 0 },
|
||||
{ X86::VFMSUBADD231PHZ128r, X86::VFMSUBADD231PHZ128m, 0 },
|
||||
{ X86::VFMSUBADD231PHZ256r, X86::VFMSUBADD231PHZ256m, 0 },
|
||||
{ X86::VFMSUBADD231PHZr, X86::VFMSUBADD231PHZm, 0 },
|
||||
{ X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, 0 },
|
||||
{ X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, 0 },
|
||||
{ X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, 0 },
|
||||
|
@ -3480,6 +3528,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0 },
|
||||
{ X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, 0 },
|
||||
{ X86::VFNMADD132PDr, X86::VFNMADD132PDm, 0 },
|
||||
{ X86::VFNMADD132PHZ128r, X86::VFNMADD132PHZ128m, 0 },
|
||||
{ X86::VFNMADD132PHZ256r, X86::VFNMADD132PHZ256m, 0 },
|
||||
{ X86::VFNMADD132PHZr, X86::VFNMADD132PHZm, 0 },
|
||||
{ X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, 0 },
|
||||
{ X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, 0 },
|
||||
{ X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, 0 },
|
||||
|
@ -3489,6 +3540,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SDr, X86::VFNMADD132SDm, 0 },
|
||||
{ X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SHZr, X86::VFNMADD132SHZm, 0 },
|
||||
{ X86::VFNMADD132SHZr_Int, X86::VFNMADD132SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, 0 },
|
||||
{ X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0 },
|
||||
|
@ -3498,6 +3551,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0 },
|
||||
{ X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, 0 },
|
||||
{ X86::VFNMADD213PDr, X86::VFNMADD213PDm, 0 },
|
||||
{ X86::VFNMADD213PHZ128r, X86::VFNMADD213PHZ128m, 0 },
|
||||
{ X86::VFNMADD213PHZ256r, X86::VFNMADD213PHZ256m, 0 },
|
||||
{ X86::VFNMADD213PHZr, X86::VFNMADD213PHZm, 0 },
|
||||
{ X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, 0 },
|
||||
{ X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, 0 },
|
||||
{ X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, 0 },
|
||||
|
@ -3507,6 +3563,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SDr, X86::VFNMADD213SDm, 0 },
|
||||
{ X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SHZr, X86::VFNMADD213SHZm, 0 },
|
||||
{ X86::VFNMADD213SHZr_Int, X86::VFNMADD213SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, 0 },
|
||||
{ X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0 },
|
||||
|
@ -3516,6 +3574,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0 },
|
||||
{ X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, 0 },
|
||||
{ X86::VFNMADD231PDr, X86::VFNMADD231PDm, 0 },
|
||||
{ X86::VFNMADD231PHZ128r, X86::VFNMADD231PHZ128m, 0 },
|
||||
{ X86::VFNMADD231PHZ256r, X86::VFNMADD231PHZ256m, 0 },
|
||||
{ X86::VFNMADD231PHZr, X86::VFNMADD231PHZm, 0 },
|
||||
{ X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, 0 },
|
||||
{ X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, 0 },
|
||||
{ X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, 0 },
|
||||
|
@ -3525,6 +3586,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SDr, X86::VFNMADD231SDm, 0 },
|
||||
{ X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SHZr, X86::VFNMADD231SHZm, 0 },
|
||||
{ X86::VFNMADD231SHZr_Int, X86::VFNMADD231SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, 0 },
|
||||
{ X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SSr, X86::VFNMADD231SSm, 0 },
|
||||
|
@ -3542,6 +3605,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0 },
|
||||
{ X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, 0 },
|
||||
{ X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, 0 },
|
||||
{ X86::VFNMSUB132PHZ128r, X86::VFNMSUB132PHZ128m, 0 },
|
||||
{ X86::VFNMSUB132PHZ256r, X86::VFNMSUB132PHZ256m, 0 },
|
||||
{ X86::VFNMSUB132PHZr, X86::VFNMSUB132PHZm, 0 },
|
||||
{ X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, 0 },
|
||||
{ X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, 0 },
|
||||
{ X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, 0 },
|
||||
|
@ -3551,6 +3617,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMSUB132SDZr_Int, X86::VFNMSUB132SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, 0 },
|
||||
{ X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SHZr, X86::VFNMSUB132SHZm, 0 },
|
||||
{ X86::VFNMSUB132SHZr_Int, X86::VFNMSUB132SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SSZr, X86::VFNMSUB132SSZm, 0 },
|
||||
{ X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0 },
|
||||
|
@ -3560,6 +3628,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0 },
|
||||
{ X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, 0 },
|
||||
{ X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, 0 },
|
||||
{ X86::VFNMSUB213PHZ128r, X86::VFNMSUB213PHZ128m, 0 },
|
||||
{ X86::VFNMSUB213PHZ256r, X86::VFNMSUB213PHZ256m, 0 },
|
||||
{ X86::VFNMSUB213PHZr, X86::VFNMSUB213PHZm, 0 },
|
||||
{ X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, 0 },
|
||||
{ X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, 0 },
|
||||
{ X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, 0 },
|
||||
|
@ -3569,6 +3640,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMSUB213SDZr_Int, X86::VFNMSUB213SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, 0 },
|
||||
{ X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SHZr, X86::VFNMSUB213SHZm, 0 },
|
||||
{ X86::VFNMSUB213SHZr_Int, X86::VFNMSUB213SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SSZr, X86::VFNMSUB213SSZm, 0 },
|
||||
{ X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0 },
|
||||
|
@ -3578,6 +3651,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0 },
|
||||
{ X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, 0 },
|
||||
{ X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, 0 },
|
||||
{ X86::VFNMSUB231PHZ128r, X86::VFNMSUB231PHZ128m, 0 },
|
||||
{ X86::VFNMSUB231PHZ256r, X86::VFNMSUB231PHZ256m, 0 },
|
||||
{ X86::VFNMSUB231PHZr, X86::VFNMSUB231PHZm, 0 },
|
||||
{ X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, 0 },
|
||||
{ X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, 0 },
|
||||
{ X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, 0 },
|
||||
|
@ -3587,6 +3663,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
|
|||
{ X86::VFNMSUB231SDZr_Int, X86::VFNMSUB231SDZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, 0 },
|
||||
{ X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SHZr, X86::VFNMSUB231SHZm, 0 },
|
||||
{ X86::VFNMSUB231SHZr_Int, X86::VFNMSUB231SHZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SSZr, X86::VFNMSUB231SSZm, 0 },
|
||||
{ X86::VFNMSUB231SSZr_Int, X86::VFNMSUB231SSZm_Int, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, 0 },
|
||||
|
@ -4599,6 +4677,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADD132PDZ256rkz, X86::VFMADD132PDZ256mkz, 0 },
|
||||
{ X86::VFMADD132PDZrk, X86::VFMADD132PDZmk, 0 },
|
||||
{ X86::VFMADD132PDZrkz, X86::VFMADD132PDZmkz, 0 },
|
||||
{ X86::VFMADD132PHZ128rk, X86::VFMADD132PHZ128mk, 0 },
|
||||
{ X86::VFMADD132PHZ128rkz, X86::VFMADD132PHZ128mkz, 0 },
|
||||
{ X86::VFMADD132PHZ256rk, X86::VFMADD132PHZ256mk, 0 },
|
||||
{ X86::VFMADD132PHZ256rkz, X86::VFMADD132PHZ256mkz, 0 },
|
||||
{ X86::VFMADD132PHZrk, X86::VFMADD132PHZmk, 0 },
|
||||
{ X86::VFMADD132PHZrkz, X86::VFMADD132PHZmkz, 0 },
|
||||
{ X86::VFMADD132PSZ128rk, X86::VFMADD132PSZ128mk, 0 },
|
||||
{ X86::VFMADD132PSZ128rkz, X86::VFMADD132PSZ128mkz, 0 },
|
||||
{ X86::VFMADD132PSZ256rk, X86::VFMADD132PSZ256mk, 0 },
|
||||
|
@ -4607,6 +4691,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADD132PSZrkz, X86::VFMADD132PSZmkz, 0 },
|
||||
{ X86::VFMADD132SDZr_Intk, X86::VFMADD132SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SDZr_Intkz, X86::VFMADD132SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SHZr_Intk, X86::VFMADD132SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0 },
|
||||
|
@ -4615,6 +4701,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADD213PDZ256rkz, X86::VFMADD213PDZ256mkz, 0 },
|
||||
{ X86::VFMADD213PDZrk, X86::VFMADD213PDZmk, 0 },
|
||||
{ X86::VFMADD213PDZrkz, X86::VFMADD213PDZmkz, 0 },
|
||||
{ X86::VFMADD213PHZ128rk, X86::VFMADD213PHZ128mk, 0 },
|
||||
{ X86::VFMADD213PHZ128rkz, X86::VFMADD213PHZ128mkz, 0 },
|
||||
{ X86::VFMADD213PHZ256rk, X86::VFMADD213PHZ256mk, 0 },
|
||||
{ X86::VFMADD213PHZ256rkz, X86::VFMADD213PHZ256mkz, 0 },
|
||||
{ X86::VFMADD213PHZrk, X86::VFMADD213PHZmk, 0 },
|
||||
{ X86::VFMADD213PHZrkz, X86::VFMADD213PHZmkz, 0 },
|
||||
{ X86::VFMADD213PSZ128rk, X86::VFMADD213PSZ128mk, 0 },
|
||||
{ X86::VFMADD213PSZ128rkz, X86::VFMADD213PSZ128mkz, 0 },
|
||||
{ X86::VFMADD213PSZ256rk, X86::VFMADD213PSZ256mk, 0 },
|
||||
|
@ -4623,6 +4715,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADD213PSZrkz, X86::VFMADD213PSZmkz, 0 },
|
||||
{ X86::VFMADD213SDZr_Intk, X86::VFMADD213SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SDZr_Intkz, X86::VFMADD213SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SHZr_Intk, X86::VFMADD213SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0 },
|
||||
|
@ -4631,6 +4725,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADD231PDZ256rkz, X86::VFMADD231PDZ256mkz, 0 },
|
||||
{ X86::VFMADD231PDZrk, X86::VFMADD231PDZmk, 0 },
|
||||
{ X86::VFMADD231PDZrkz, X86::VFMADD231PDZmkz, 0 },
|
||||
{ X86::VFMADD231PHZ128rk, X86::VFMADD231PHZ128mk, 0 },
|
||||
{ X86::VFMADD231PHZ128rkz, X86::VFMADD231PHZ128mkz, 0 },
|
||||
{ X86::VFMADD231PHZ256rk, X86::VFMADD231PHZ256mk, 0 },
|
||||
{ X86::VFMADD231PHZ256rkz, X86::VFMADD231PHZ256mkz, 0 },
|
||||
{ X86::VFMADD231PHZrk, X86::VFMADD231PHZmk, 0 },
|
||||
{ X86::VFMADD231PHZrkz, X86::VFMADD231PHZmkz, 0 },
|
||||
{ X86::VFMADD231PSZ128rk, X86::VFMADD231PSZ128mk, 0 },
|
||||
{ X86::VFMADD231PSZ128rkz, X86::VFMADD231PSZ128mkz, 0 },
|
||||
{ X86::VFMADD231PSZ256rk, X86::VFMADD231PSZ256mk, 0 },
|
||||
|
@ -4639,6 +4739,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADD231PSZrkz, X86::VFMADD231PSZmkz, 0 },
|
||||
{ X86::VFMADD231SDZr_Intk, X86::VFMADD231SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SDZr_Intkz, X86::VFMADD231SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SHZr_Intk, X86::VFMADD231SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SHZr_Intkz, X86::VFMADD231SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SSZr_Intk, X86::VFMADD231SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMADD231SSZr_Intkz, X86::VFMADD231SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMADDSUB132PDZ128rk, X86::VFMADDSUB132PDZ128mk, 0 },
|
||||
|
@ -4647,6 +4749,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADDSUB132PDZ256rkz, X86::VFMADDSUB132PDZ256mkz, 0 },
|
||||
{ X86::VFMADDSUB132PDZrk, X86::VFMADDSUB132PDZmk, 0 },
|
||||
{ X86::VFMADDSUB132PDZrkz, X86::VFMADDSUB132PDZmkz, 0 },
|
||||
{ X86::VFMADDSUB132PHZ128rk, X86::VFMADDSUB132PHZ128mk, 0 },
|
||||
{ X86::VFMADDSUB132PHZ128rkz, X86::VFMADDSUB132PHZ128mkz, 0 },
|
||||
{ X86::VFMADDSUB132PHZ256rk, X86::VFMADDSUB132PHZ256mk, 0 },
|
||||
{ X86::VFMADDSUB132PHZ256rkz, X86::VFMADDSUB132PHZ256mkz, 0 },
|
||||
{ X86::VFMADDSUB132PHZrk, X86::VFMADDSUB132PHZmk, 0 },
|
||||
{ X86::VFMADDSUB132PHZrkz, X86::VFMADDSUB132PHZmkz, 0 },
|
||||
{ X86::VFMADDSUB132PSZ128rk, X86::VFMADDSUB132PSZ128mk, 0 },
|
||||
{ X86::VFMADDSUB132PSZ128rkz, X86::VFMADDSUB132PSZ128mkz, 0 },
|
||||
{ X86::VFMADDSUB132PSZ256rk, X86::VFMADDSUB132PSZ256mk, 0 },
|
||||
|
@ -4659,6 +4767,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADDSUB213PDZ256rkz, X86::VFMADDSUB213PDZ256mkz, 0 },
|
||||
{ X86::VFMADDSUB213PDZrk, X86::VFMADDSUB213PDZmk, 0 },
|
||||
{ X86::VFMADDSUB213PDZrkz, X86::VFMADDSUB213PDZmkz, 0 },
|
||||
{ X86::VFMADDSUB213PHZ128rk, X86::VFMADDSUB213PHZ128mk, 0 },
|
||||
{ X86::VFMADDSUB213PHZ128rkz, X86::VFMADDSUB213PHZ128mkz, 0 },
|
||||
{ X86::VFMADDSUB213PHZ256rk, X86::VFMADDSUB213PHZ256mk, 0 },
|
||||
{ X86::VFMADDSUB213PHZ256rkz, X86::VFMADDSUB213PHZ256mkz, 0 },
|
||||
{ X86::VFMADDSUB213PHZrk, X86::VFMADDSUB213PHZmk, 0 },
|
||||
{ X86::VFMADDSUB213PHZrkz, X86::VFMADDSUB213PHZmkz, 0 },
|
||||
{ X86::VFMADDSUB213PSZ128rk, X86::VFMADDSUB213PSZ128mk, 0 },
|
||||
{ X86::VFMADDSUB213PSZ128rkz, X86::VFMADDSUB213PSZ128mkz, 0 },
|
||||
{ X86::VFMADDSUB213PSZ256rk, X86::VFMADDSUB213PSZ256mk, 0 },
|
||||
|
@ -4671,6 +4785,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMADDSUB231PDZ256rkz, X86::VFMADDSUB231PDZ256mkz, 0 },
|
||||
{ X86::VFMADDSUB231PDZrk, X86::VFMADDSUB231PDZmk, 0 },
|
||||
{ X86::VFMADDSUB231PDZrkz, X86::VFMADDSUB231PDZmkz, 0 },
|
||||
{ X86::VFMADDSUB231PHZ128rk, X86::VFMADDSUB231PHZ128mk, 0 },
|
||||
{ X86::VFMADDSUB231PHZ128rkz, X86::VFMADDSUB231PHZ128mkz, 0 },
|
||||
{ X86::VFMADDSUB231PHZ256rk, X86::VFMADDSUB231PHZ256mk, 0 },
|
||||
{ X86::VFMADDSUB231PHZ256rkz, X86::VFMADDSUB231PHZ256mkz, 0 },
|
||||
{ X86::VFMADDSUB231PHZrk, X86::VFMADDSUB231PHZmk, 0 },
|
||||
{ X86::VFMADDSUB231PHZrkz, X86::VFMADDSUB231PHZmkz, 0 },
|
||||
{ X86::VFMADDSUB231PSZ128rk, X86::VFMADDSUB231PSZ128mk, 0 },
|
||||
{ X86::VFMADDSUB231PSZ128rkz, X86::VFMADDSUB231PSZ128mkz, 0 },
|
||||
{ X86::VFMADDSUB231PSZ256rk, X86::VFMADDSUB231PSZ256mk, 0 },
|
||||
|
@ -4683,6 +4803,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUB132PDZ256rkz, X86::VFMSUB132PDZ256mkz, 0 },
|
||||
{ X86::VFMSUB132PDZrk, X86::VFMSUB132PDZmk, 0 },
|
||||
{ X86::VFMSUB132PDZrkz, X86::VFMSUB132PDZmkz, 0 },
|
||||
{ X86::VFMSUB132PHZ128rk, X86::VFMSUB132PHZ128mk, 0 },
|
||||
{ X86::VFMSUB132PHZ128rkz, X86::VFMSUB132PHZ128mkz, 0 },
|
||||
{ X86::VFMSUB132PHZ256rk, X86::VFMSUB132PHZ256mk, 0 },
|
||||
{ X86::VFMSUB132PHZ256rkz, X86::VFMSUB132PHZ256mkz, 0 },
|
||||
{ X86::VFMSUB132PHZrk, X86::VFMSUB132PHZmk, 0 },
|
||||
{ X86::VFMSUB132PHZrkz, X86::VFMSUB132PHZmkz, 0 },
|
||||
{ X86::VFMSUB132PSZ128rk, X86::VFMSUB132PSZ128mk, 0 },
|
||||
{ X86::VFMSUB132PSZ128rkz, X86::VFMSUB132PSZ128mkz, 0 },
|
||||
{ X86::VFMSUB132PSZ256rk, X86::VFMSUB132PSZ256mk, 0 },
|
||||
|
@ -4691,6 +4817,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmkz, 0 },
|
||||
{ X86::VFMSUB132SDZr_Intk, X86::VFMSUB132SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SDZr_Intkz, X86::VFMSUB132SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SHZr_Intk, X86::VFMSUB132SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0 },
|
||||
|
@ -4699,6 +4827,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUB213PDZ256rkz, X86::VFMSUB213PDZ256mkz, 0 },
|
||||
{ X86::VFMSUB213PDZrk, X86::VFMSUB213PDZmk, 0 },
|
||||
{ X86::VFMSUB213PDZrkz, X86::VFMSUB213PDZmkz, 0 },
|
||||
{ X86::VFMSUB213PHZ128rk, X86::VFMSUB213PHZ128mk, 0 },
|
||||
{ X86::VFMSUB213PHZ128rkz, X86::VFMSUB213PHZ128mkz, 0 },
|
||||
{ X86::VFMSUB213PHZ256rk, X86::VFMSUB213PHZ256mk, 0 },
|
||||
{ X86::VFMSUB213PHZ256rkz, X86::VFMSUB213PHZ256mkz, 0 },
|
||||
{ X86::VFMSUB213PHZrk, X86::VFMSUB213PHZmk, 0 },
|
||||
{ X86::VFMSUB213PHZrkz, X86::VFMSUB213PHZmkz, 0 },
|
||||
{ X86::VFMSUB213PSZ128rk, X86::VFMSUB213PSZ128mk, 0 },
|
||||
{ X86::VFMSUB213PSZ128rkz, X86::VFMSUB213PSZ128mkz, 0 },
|
||||
{ X86::VFMSUB213PSZ256rk, X86::VFMSUB213PSZ256mk, 0 },
|
||||
|
@ -4707,6 +4841,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmkz, 0 },
|
||||
{ X86::VFMSUB213SDZr_Intk, X86::VFMSUB213SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SDZr_Intkz, X86::VFMSUB213SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SHZr_Intk, X86::VFMSUB213SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0 },
|
||||
|
@ -4715,6 +4851,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUB231PDZ256rkz, X86::VFMSUB231PDZ256mkz, 0 },
|
||||
{ X86::VFMSUB231PDZrk, X86::VFMSUB231PDZmk, 0 },
|
||||
{ X86::VFMSUB231PDZrkz, X86::VFMSUB231PDZmkz, 0 },
|
||||
{ X86::VFMSUB231PHZ128rk, X86::VFMSUB231PHZ128mk, 0 },
|
||||
{ X86::VFMSUB231PHZ128rkz, X86::VFMSUB231PHZ128mkz, 0 },
|
||||
{ X86::VFMSUB231PHZ256rk, X86::VFMSUB231PHZ256mk, 0 },
|
||||
{ X86::VFMSUB231PHZ256rkz, X86::VFMSUB231PHZ256mkz, 0 },
|
||||
{ X86::VFMSUB231PHZrk, X86::VFMSUB231PHZmk, 0 },
|
||||
{ X86::VFMSUB231PHZrkz, X86::VFMSUB231PHZmkz, 0 },
|
||||
{ X86::VFMSUB231PSZ128rk, X86::VFMSUB231PSZ128mk, 0 },
|
||||
{ X86::VFMSUB231PSZ128rkz, X86::VFMSUB231PSZ128mkz, 0 },
|
||||
{ X86::VFMSUB231PSZ256rk, X86::VFMSUB231PSZ256mk, 0 },
|
||||
|
@ -4723,6 +4865,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUB231PSZrkz, X86::VFMSUB231PSZmkz, 0 },
|
||||
{ X86::VFMSUB231SDZr_Intk, X86::VFMSUB231SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SDZr_Intkz, X86::VFMSUB231SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SHZr_Intk, X86::VFMSUB231SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SHZr_Intkz, X86::VFMSUB231SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SSZr_Intk, X86::VFMSUB231SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFMSUB231SSZr_Intkz, X86::VFMSUB231SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFMSUBADD132PDZ128rk, X86::VFMSUBADD132PDZ128mk, 0 },
|
||||
|
@ -4731,6 +4875,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUBADD132PDZ256rkz, X86::VFMSUBADD132PDZ256mkz, 0 },
|
||||
{ X86::VFMSUBADD132PDZrk, X86::VFMSUBADD132PDZmk, 0 },
|
||||
{ X86::VFMSUBADD132PDZrkz, X86::VFMSUBADD132PDZmkz, 0 },
|
||||
{ X86::VFMSUBADD132PHZ128rk, X86::VFMSUBADD132PHZ128mk, 0 },
|
||||
{ X86::VFMSUBADD132PHZ128rkz, X86::VFMSUBADD132PHZ128mkz, 0 },
|
||||
{ X86::VFMSUBADD132PHZ256rk, X86::VFMSUBADD132PHZ256mk, 0 },
|
||||
{ X86::VFMSUBADD132PHZ256rkz, X86::VFMSUBADD132PHZ256mkz, 0 },
|
||||
{ X86::VFMSUBADD132PHZrk, X86::VFMSUBADD132PHZmk, 0 },
|
||||
{ X86::VFMSUBADD132PHZrkz, X86::VFMSUBADD132PHZmkz, 0 },
|
||||
{ X86::VFMSUBADD132PSZ128rk, X86::VFMSUBADD132PSZ128mk, 0 },
|
||||
{ X86::VFMSUBADD132PSZ128rkz, X86::VFMSUBADD132PSZ128mkz, 0 },
|
||||
{ X86::VFMSUBADD132PSZ256rk, X86::VFMSUBADD132PSZ256mk, 0 },
|
||||
|
@ -4743,6 +4893,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUBADD213PDZ256rkz, X86::VFMSUBADD213PDZ256mkz, 0 },
|
||||
{ X86::VFMSUBADD213PDZrk, X86::VFMSUBADD213PDZmk, 0 },
|
||||
{ X86::VFMSUBADD213PDZrkz, X86::VFMSUBADD213PDZmkz, 0 },
|
||||
{ X86::VFMSUBADD213PHZ128rk, X86::VFMSUBADD213PHZ128mk, 0 },
|
||||
{ X86::VFMSUBADD213PHZ128rkz, X86::VFMSUBADD213PHZ128mkz, 0 },
|
||||
{ X86::VFMSUBADD213PHZ256rk, X86::VFMSUBADD213PHZ256mk, 0 },
|
||||
{ X86::VFMSUBADD213PHZ256rkz, X86::VFMSUBADD213PHZ256mkz, 0 },
|
||||
{ X86::VFMSUBADD213PHZrk, X86::VFMSUBADD213PHZmk, 0 },
|
||||
{ X86::VFMSUBADD213PHZrkz, X86::VFMSUBADD213PHZmkz, 0 },
|
||||
{ X86::VFMSUBADD213PSZ128rk, X86::VFMSUBADD213PSZ128mk, 0 },
|
||||
{ X86::VFMSUBADD213PSZ128rkz, X86::VFMSUBADD213PSZ128mkz, 0 },
|
||||
{ X86::VFMSUBADD213PSZ256rk, X86::VFMSUBADD213PSZ256mk, 0 },
|
||||
|
@ -4755,6 +4911,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFMSUBADD231PDZ256rkz, X86::VFMSUBADD231PDZ256mkz, 0 },
|
||||
{ X86::VFMSUBADD231PDZrk, X86::VFMSUBADD231PDZmk, 0 },
|
||||
{ X86::VFMSUBADD231PDZrkz, X86::VFMSUBADD231PDZmkz, 0 },
|
||||
{ X86::VFMSUBADD231PHZ128rk, X86::VFMSUBADD231PHZ128mk, 0 },
|
||||
{ X86::VFMSUBADD231PHZ128rkz, X86::VFMSUBADD231PHZ128mkz, 0 },
|
||||
{ X86::VFMSUBADD231PHZ256rk, X86::VFMSUBADD231PHZ256mk, 0 },
|
||||
{ X86::VFMSUBADD231PHZ256rkz, X86::VFMSUBADD231PHZ256mkz, 0 },
|
||||
{ X86::VFMSUBADD231PHZrk, X86::VFMSUBADD231PHZmk, 0 },
|
||||
{ X86::VFMSUBADD231PHZrkz, X86::VFMSUBADD231PHZmkz, 0 },
|
||||
{ X86::VFMSUBADD231PSZ128rk, X86::VFMSUBADD231PSZ128mk, 0 },
|
||||
{ X86::VFMSUBADD231PSZ128rkz, X86::VFMSUBADD231PSZ128mkz, 0 },
|
||||
{ X86::VFMSUBADD231PSZ256rk, X86::VFMSUBADD231PSZ256mk, 0 },
|
||||
|
@ -4767,6 +4929,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMADD132PDZ256rkz, X86::VFNMADD132PDZ256mkz, 0 },
|
||||
{ X86::VFNMADD132PDZrk, X86::VFNMADD132PDZmk, 0 },
|
||||
{ X86::VFNMADD132PDZrkz, X86::VFNMADD132PDZmkz, 0 },
|
||||
{ X86::VFNMADD132PHZ128rk, X86::VFNMADD132PHZ128mk, 0 },
|
||||
{ X86::VFNMADD132PHZ128rkz, X86::VFNMADD132PHZ128mkz, 0 },
|
||||
{ X86::VFNMADD132PHZ256rk, X86::VFNMADD132PHZ256mk, 0 },
|
||||
{ X86::VFNMADD132PHZ256rkz, X86::VFNMADD132PHZ256mkz, 0 },
|
||||
{ X86::VFNMADD132PHZrk, X86::VFNMADD132PHZmk, 0 },
|
||||
{ X86::VFNMADD132PHZrkz, X86::VFNMADD132PHZmkz, 0 },
|
||||
{ X86::VFNMADD132PSZ128rk, X86::VFNMADD132PSZ128mk, 0 },
|
||||
{ X86::VFNMADD132PSZ128rkz, X86::VFNMADD132PSZ128mkz, 0 },
|
||||
{ X86::VFNMADD132PSZ256rk, X86::VFNMADD132PSZ256mk, 0 },
|
||||
|
@ -4775,6 +4943,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmkz, 0 },
|
||||
{ X86::VFNMADD132SDZr_Intk, X86::VFNMADD132SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SDZr_Intkz, X86::VFNMADD132SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SHZr_Intk, X86::VFNMADD132SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0 },
|
||||
|
@ -4783,6 +4953,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMADD213PDZ256rkz, X86::VFNMADD213PDZ256mkz, 0 },
|
||||
{ X86::VFNMADD213PDZrk, X86::VFNMADD213PDZmk, 0 },
|
||||
{ X86::VFNMADD213PDZrkz, X86::VFNMADD213PDZmkz, 0 },
|
||||
{ X86::VFNMADD213PHZ128rk, X86::VFNMADD213PHZ128mk, 0 },
|
||||
{ X86::VFNMADD213PHZ128rkz, X86::VFNMADD213PHZ128mkz, 0 },
|
||||
{ X86::VFNMADD213PHZ256rk, X86::VFNMADD213PHZ256mk, 0 },
|
||||
{ X86::VFNMADD213PHZ256rkz, X86::VFNMADD213PHZ256mkz, 0 },
|
||||
{ X86::VFNMADD213PHZrk, X86::VFNMADD213PHZmk, 0 },
|
||||
{ X86::VFNMADD213PHZrkz, X86::VFNMADD213PHZmkz, 0 },
|
||||
{ X86::VFNMADD213PSZ128rk, X86::VFNMADD213PSZ128mk, 0 },
|
||||
{ X86::VFNMADD213PSZ128rkz, X86::VFNMADD213PSZ128mkz, 0 },
|
||||
{ X86::VFNMADD213PSZ256rk, X86::VFNMADD213PSZ256mk, 0 },
|
||||
|
@ -4791,6 +4967,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmkz, 0 },
|
||||
{ X86::VFNMADD213SDZr_Intk, X86::VFNMADD213SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SDZr_Intkz, X86::VFNMADD213SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SHZr_Intk, X86::VFNMADD213SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0 },
|
||||
|
@ -4799,6 +4977,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMADD231PDZ256rkz, X86::VFNMADD231PDZ256mkz, 0 },
|
||||
{ X86::VFNMADD231PDZrk, X86::VFNMADD231PDZmk, 0 },
|
||||
{ X86::VFNMADD231PDZrkz, X86::VFNMADD231PDZmkz, 0 },
|
||||
{ X86::VFNMADD231PHZ128rk, X86::VFNMADD231PHZ128mk, 0 },
|
||||
{ X86::VFNMADD231PHZ128rkz, X86::VFNMADD231PHZ128mkz, 0 },
|
||||
{ X86::VFNMADD231PHZ256rk, X86::VFNMADD231PHZ256mk, 0 },
|
||||
{ X86::VFNMADD231PHZ256rkz, X86::VFNMADD231PHZ256mkz, 0 },
|
||||
{ X86::VFNMADD231PHZrk, X86::VFNMADD231PHZmk, 0 },
|
||||
{ X86::VFNMADD231PHZrkz, X86::VFNMADD231PHZmkz, 0 },
|
||||
{ X86::VFNMADD231PSZ128rk, X86::VFNMADD231PSZ128mk, 0 },
|
||||
{ X86::VFNMADD231PSZ128rkz, X86::VFNMADD231PSZ128mkz, 0 },
|
||||
{ X86::VFNMADD231PSZ256rk, X86::VFNMADD231PSZ256mk, 0 },
|
||||
|
@ -4807,6 +4991,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmkz, 0 },
|
||||
{ X86::VFNMADD231SDZr_Intk, X86::VFNMADD231SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SDZr_Intkz, X86::VFNMADD231SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SHZr_Intk, X86::VFNMADD231SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0 },
|
||||
|
@ -4815,6 +5001,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMSUB132PDZ256rkz, X86::VFNMSUB132PDZ256mkz, 0 },
|
||||
{ X86::VFNMSUB132PDZrk, X86::VFNMSUB132PDZmk, 0 },
|
||||
{ X86::VFNMSUB132PDZrkz, X86::VFNMSUB132PDZmkz, 0 },
|
||||
{ X86::VFNMSUB132PHZ128rk, X86::VFNMSUB132PHZ128mk, 0 },
|
||||
{ X86::VFNMSUB132PHZ128rkz, X86::VFNMSUB132PHZ128mkz, 0 },
|
||||
{ X86::VFNMSUB132PHZ256rk, X86::VFNMSUB132PHZ256mk, 0 },
|
||||
{ X86::VFNMSUB132PHZ256rkz, X86::VFNMSUB132PHZ256mkz, 0 },
|
||||
{ X86::VFNMSUB132PHZrk, X86::VFNMSUB132PHZmk, 0 },
|
||||
{ X86::VFNMSUB132PHZrkz, X86::VFNMSUB132PHZmkz, 0 },
|
||||
{ X86::VFNMSUB132PSZ128rk, X86::VFNMSUB132PSZ128mk, 0 },
|
||||
{ X86::VFNMSUB132PSZ128rkz, X86::VFNMSUB132PSZ128mkz, 0 },
|
||||
{ X86::VFNMSUB132PSZ256rk, X86::VFNMSUB132PSZ256mk, 0 },
|
||||
|
@ -4823,6 +5015,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmkz, 0 },
|
||||
{ X86::VFNMSUB132SDZr_Intk, X86::VFNMSUB132SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SDZr_Intkz, X86::VFNMSUB132SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SHZr_Intk, X86::VFNMSUB132SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0 },
|
||||
|
@ -4831,6 +5025,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMSUB213PDZ256rkz, X86::VFNMSUB213PDZ256mkz, 0 },
|
||||
{ X86::VFNMSUB213PDZrk, X86::VFNMSUB213PDZmk, 0 },
|
||||
{ X86::VFNMSUB213PDZrkz, X86::VFNMSUB213PDZmkz, 0 },
|
||||
{ X86::VFNMSUB213PHZ128rk, X86::VFNMSUB213PHZ128mk, 0 },
|
||||
{ X86::VFNMSUB213PHZ128rkz, X86::VFNMSUB213PHZ128mkz, 0 },
|
||||
{ X86::VFNMSUB213PHZ256rk, X86::VFNMSUB213PHZ256mk, 0 },
|
||||
{ X86::VFNMSUB213PHZ256rkz, X86::VFNMSUB213PHZ256mkz, 0 },
|
||||
{ X86::VFNMSUB213PHZrk, X86::VFNMSUB213PHZmk, 0 },
|
||||
{ X86::VFNMSUB213PHZrkz, X86::VFNMSUB213PHZmkz, 0 },
|
||||
{ X86::VFNMSUB213PSZ128rk, X86::VFNMSUB213PSZ128mk, 0 },
|
||||
{ X86::VFNMSUB213PSZ128rkz, X86::VFNMSUB213PSZ128mkz, 0 },
|
||||
{ X86::VFNMSUB213PSZ256rk, X86::VFNMSUB213PSZ256mk, 0 },
|
||||
|
@ -4839,6 +5039,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmkz, 0 },
|
||||
{ X86::VFNMSUB213SDZr_Intk, X86::VFNMSUB213SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SDZr_Intkz, X86::VFNMSUB213SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SHZr_Intk, X86::VFNMSUB213SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0 },
|
||||
|
@ -4847,6 +5049,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMSUB231PDZ256rkz, X86::VFNMSUB231PDZ256mkz, 0 },
|
||||
{ X86::VFNMSUB231PDZrk, X86::VFNMSUB231PDZmk, 0 },
|
||||
{ X86::VFNMSUB231PDZrkz, X86::VFNMSUB231PDZmkz, 0 },
|
||||
{ X86::VFNMSUB231PHZ128rk, X86::VFNMSUB231PHZ128mk, 0 },
|
||||
{ X86::VFNMSUB231PHZ128rkz, X86::VFNMSUB231PHZ128mkz, 0 },
|
||||
{ X86::VFNMSUB231PHZ256rk, X86::VFNMSUB231PHZ256mk, 0 },
|
||||
{ X86::VFNMSUB231PHZ256rkz, X86::VFNMSUB231PHZ256mkz, 0 },
|
||||
{ X86::VFNMSUB231PHZrk, X86::VFNMSUB231PHZmk, 0 },
|
||||
{ X86::VFNMSUB231PHZrkz, X86::VFNMSUB231PHZmkz, 0 },
|
||||
{ X86::VFNMSUB231PSZ128rk, X86::VFNMSUB231PSZ128mk, 0 },
|
||||
{ X86::VFNMSUB231PSZ128rkz, X86::VFNMSUB231PSZ128mkz, 0 },
|
||||
{ X86::VFNMSUB231PSZ256rk, X86::VFNMSUB231PSZ256mk, 0 },
|
||||
|
@ -4855,6 +5063,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
|
|||
{ X86::VFNMSUB231PSZrkz, X86::VFNMSUB231PSZmkz, 0 },
|
||||
{ X86::VFNMSUB231SDZr_Intk, X86::VFNMSUB231SDZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SDZr_Intkz, X86::VFNMSUB231SDZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SHZr_Intk, X86::VFNMSUB231SHZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SHZr_Intkz, X86::VFNMSUB231SHZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SSZr_Intk, X86::VFNMSUB231SSZm_Intk, TB_NO_REVERSE },
|
||||
{ X86::VFNMSUB231SSZr_Intkz, X86::VFNMSUB231SSZm_Intkz, TB_NO_REVERSE },
|
||||
{ X86::VGETEXPSDZrk, X86::VGETEXPSDZmk, TB_NO_REVERSE },
|
||||
|
|
|
@ -882,7 +882,6 @@ class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
|
|||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8PD,
|
||||
EVEX_4V, Requires<[HasAVX512]>;
|
||||
class AVX512FMA3Base : T8PD, EVEX_4V;
|
||||
|
||||
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
|
|
|
@ -6137,6 +6137,24 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
|
|||
case X86::VMINSHZrr_Intk: case X86::VMINSHZrr_Intkz:
|
||||
case X86::VMULSHZrr_Intk: case X86::VMULSHZrr_Intkz:
|
||||
case X86::VSUBSHZrr_Intk: case X86::VSUBSHZrr_Intkz:
|
||||
case X86::VFMADD132SHZr_Int: case X86::VFNMADD132SHZr_Int:
|
||||
case X86::VFMADD213SHZr_Int: case X86::VFNMADD213SHZr_Int:
|
||||
case X86::VFMADD231SHZr_Int: case X86::VFNMADD231SHZr_Int:
|
||||
case X86::VFMSUB132SHZr_Int: case X86::VFNMSUB132SHZr_Int:
|
||||
case X86::VFMSUB213SHZr_Int: case X86::VFNMSUB213SHZr_Int:
|
||||
case X86::VFMSUB231SHZr_Int: case X86::VFNMSUB231SHZr_Int:
|
||||
case X86::VFMADD132SHZr_Intk: case X86::VFNMADD132SHZr_Intk:
|
||||
case X86::VFMADD213SHZr_Intk: case X86::VFNMADD213SHZr_Intk:
|
||||
case X86::VFMADD231SHZr_Intk: case X86::VFNMADD231SHZr_Intk:
|
||||
case X86::VFMSUB132SHZr_Intk: case X86::VFNMSUB132SHZr_Intk:
|
||||
case X86::VFMSUB213SHZr_Intk: case X86::VFNMSUB213SHZr_Intk:
|
||||
case X86::VFMSUB231SHZr_Intk: case X86::VFNMSUB231SHZr_Intk:
|
||||
case X86::VFMADD132SHZr_Intkz: case X86::VFNMADD132SHZr_Intkz:
|
||||
case X86::VFMADD213SHZr_Intkz: case X86::VFNMADD213SHZr_Intkz:
|
||||
case X86::VFMADD231SHZr_Intkz: case X86::VFNMADD231SHZr_Intkz:
|
||||
case X86::VFMSUB132SHZr_Intkz: case X86::VFNMSUB132SHZr_Intkz:
|
||||
case X86::VFMSUB213SHZr_Intkz: case X86::VFNMSUB213SHZr_Intkz:
|
||||
case X86::VFMSUB231SHZr_Intkz: case X86::VFNMSUB231SHZr_Intkz:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
|
|
@ -1187,6 +1187,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vcvtusi642sh, INTR_TYPE_2OP,
|
||||
X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vfmadd_f16, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vfmadd_ph_512, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_128, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_512, INTR_TYPE_3OP, X86ISD::FMADDSUB,
|
||||
X86ISD::FMADDSUB_RND),
|
||||
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,585 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
|
||||
|
||||
|
||||
declare <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half>, <32 x half>, <32 x half>, i32)
|
||||
|
||||
define <32 x half> @test_x86_vfnmadd_ph_z(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_ph_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ph %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x48,0xac,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = call <32 x half> @llvm.fma.v32f16(<32 x half> %a0, <32 x half> %1, <32 x half> %a2)
|
||||
ret <32 x half> %2
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_vfnmadd_ph(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_vfnmadd_ph:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9c,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_vfnmadd_ph:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9c,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = call <32 x half> @llvm.fma.v32f16(<32 x half> %a0, <32 x half> %1, <32 x half> %a2)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x half> %2, <32 x half> %a0
|
||||
ret <32 x half> %4
|
||||
}
|
||||
|
||||
define <32 x half> @test_x86_vfnmsubph_z(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubph_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ph %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x48,0xae,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a2
|
||||
%3 = call <32 x half> @llvm.fma.v32f16(<32 x half> %a0, <32 x half> %1, <32 x half> %2)
|
||||
ret <32 x half> %3
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_vfnmsub_ph(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_vfnmsub_ph:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9e,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_vfnmsub_ph:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9e,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a2
|
||||
%3 = call <32 x half> @llvm.fma.v32f16(<32 x half> %a0, <32 x half> %1, <32 x half> %2)
|
||||
%4 = bitcast i32 %mask to <32 x i1>
|
||||
%5 = select <32 x i1> %4, <32 x half> %3, <32 x half> %a0
|
||||
ret <32 x half> %5
|
||||
}
|
||||
|
||||
define <32 x half> @test_x86_vfmaddsubph_z(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmaddsubph_z:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsub213ph %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x48,0xa6,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 4) #2
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_fmaddsub_ph(<32 x half> %a, <32 x half> %b, <32 x half> %c, i32 %mask) {
|
||||
; X86-LABEL: test_mask_fmaddsub_ph:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x96,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_fmaddsub_ph:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x96,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %a, <32 x half> %b, <32 x half> %c, i32 4)
|
||||
%bc = bitcast i32 %mask to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %a
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
declare <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half>, <32 x half>, <32 x half>, i32) nounwind readnone
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask_vfmaddsub_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x96,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x96,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 4)
|
||||
%bc = bitcast i32 %x3 to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %x0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask3_vfmaddsub_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xb6,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xb6,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 4)
|
||||
%bc = bitcast i32 %x3 to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %x2
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_maskz_vfmaddsub_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub213ph %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xc9,0xa6,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub213ph %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xc9,0xa6,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 4)
|
||||
%bc = bitcast i32 %x3 to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> zeroinitializer
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask3_vfmsubadd_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmsubadd231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xb7,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmsubadd231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xb7,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%neg = fneg <32 x half> %x2
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %neg, i32 4)
|
||||
%bc = bitcast i32 %x3 to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %x2
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrb_rne(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_round_vfmadd512_ph_rrb_rne:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd132ph {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x19,0x98,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_round_vfmadd512_ph_rrb_rne:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd132ph {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x19,0x98,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 8) nounwind
|
||||
%bc = bitcast i32 %mask to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %a0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrb_rtn(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_round_vfmadd512_ph_rrb_rtn:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd132ph {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x39,0x98,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_round_vfmadd512_ph_rrb_rtn:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd132ph {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x39,0x98,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 9) nounwind
|
||||
%bc = bitcast i32 %mask to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %a0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrb_rtp(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_round_vfmadd512_ph_rrb_rtp:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd132ph {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x59,0x98,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_round_vfmadd512_ph_rrb_rtp:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd132ph {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x59,0x98,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 10) nounwind
|
||||
%bc = bitcast i32 %mask to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %a0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrb_rtz(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_round_vfmadd512_ph_rrb_rtz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd132ph {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x79,0x98,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_round_vfmadd512_ph_rrb_rtz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd132ph {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x79,0x98,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 11) nounwind
|
||||
%bc = bitcast i32 %mask to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %a0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrb_current(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 %mask) {
|
||||
; X86-LABEL: test_mask_round_vfmadd512_ph_rrb_current:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x98,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_round_vfmadd512_ph_rrb_current:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x98,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 4) nounwind
|
||||
%bc = bitcast i32 %mask to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %a0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrbz_rne(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ph_rrbz_rne:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x18,0xa8,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 8) nounwind
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrbz_rtn(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ph_rrbz_rtn:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x38,0xa8,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 9) nounwind
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrbz_rtp(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ph_rrbz_rtp:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x58,0xa8,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 10) nounwind
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrbz_rtz(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ph_rrbz_rtz:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x78,0xa8,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 11) nounwind
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half> @test_mask_round_vfmadd512_ph_rrbz_current(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) {
|
||||
; CHECK-LABEL: test_mask_round_vfmadd512_ph_rrbz_current:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x48,0xa8,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2, i32 4) nounwind
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask3_vfmsub_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmsub231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xba,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmsub231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xba,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%2 = call <32 x half> @llvm.fma.v32f16(<32 x half> %x0, <32 x half> %x1, <32 x half> %1)
|
||||
%3 = bitcast i32 %x3 to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x half> %2, <32 x half> %x2
|
||||
ret <32 x half> %4
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask_vfmadd_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x98,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x98,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x half> @llvm.x86.avx512fp16.vfmadd.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 4)
|
||||
%bc = bitcast i32 %x3 to <32 x i1>
|
||||
%sel = select <32 x i1> %bc, <32 x half> %res, <32 x half> %x0
|
||||
ret <32 x half> %sel
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask3_vfmadd_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xb8,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xb8,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <32 x half> @llvm.fma.v32f16(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %x2
|
||||
ret <32 x half> %3
|
||||
}
|
||||
|
||||
define <32 x half> @test_int_x86_avx512_maskz_vfmadd_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xc9,0xa8,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xc9,0xa8,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <32 x half> @llvm.fma.v32f16(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
|
||||
ret <32 x half> %3
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask_vfnmsub_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9e,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9e,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x1
|
||||
%2 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%3 = call <32 x half> @llvm.fma.v32f16(<32 x half> %x0, <32 x half> %1, <32 x half> %2)
|
||||
%4 = bitcast i32 %x3 to <32 x i1>
|
||||
%5 = select <32 x i1> %4, <32 x half> %3, <32 x half> %x0
|
||||
ret <32 x half> %5
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask3_vfnmsub_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xbe,0xd1]
|
||||
; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub231ph %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x49,0xbe,0xd1]
|
||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x0
|
||||
%2 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%3 = call <32 x half> @llvm.fma.v32f16(<32 x half> %1, <32 x half> %x1, <32 x half> %2)
|
||||
%4 = bitcast i32 %x3 to <32 x i1>
|
||||
%5 = select <32 x i1> %4, <32 x half> %3, <32 x half> %x2
|
||||
ret <32 x half> %5
|
||||
}
|
||||
|
||||
define <32 x half>@test_int_x86_avx512_mask_vfnmadd_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_ph_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9c,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_ph_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmadd132ph %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x49,0x9c,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x1
|
||||
%2 = call <32 x half> @llvm.fma.v32f16(<32 x half> %x0, <32 x half> %1, <32 x half> %x2)
|
||||
%3 = bitcast i32 %x3 to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x half> %2, <32 x half> %x0
|
||||
ret <32 x half> %4
|
||||
}
|
||||
|
||||
define <32 x half> @test_x86_fma_vfnmadd_ph_512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ph %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x48,0xac,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%2 = call <32 x half> @llvm.fma.v32f16(<32 x half> %1, <32 x half> %a1, <32 x half> %a2)
|
||||
ret <32 x half> %2
|
||||
}
|
||||
|
||||
define <32 x half> @test_x86_fma_vfnmsub_ph_512(<32 x half> %a0, <32 x half> %a1, <32 x half> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ph_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ph %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x75,0x48,0xae,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%2 = fsub <32 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%3 = call <32 x half> @llvm.fma.v32f16(<32 x half> %1, <32 x half> %a1, <32 x half> %2)
|
||||
ret <32 x half> %3
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask3_vfmadd_sh(<8 x half> %x0, <8 x half> %x1, half *%ptr_b, i8 %x3, i32 %x4) {
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sh:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vfmadd231sh (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb9,0x08]
|
||||
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_sh:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vfmadd231sh (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb9,0x0f]
|
||||
; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%q = load half, half* %ptr_b
|
||||
%vecinit.i = insertelement <8 x half> undef, half %q, i32 0
|
||||
%1 = extractelement <8 x half> %x0, i64 0
|
||||
%2 = extractelement <8 x half> %vecinit.i, i64 0
|
||||
%3 = extractelement <8 x half> %x1, i64 0
|
||||
%4 = call half @llvm.fma.f16(half %1, half %2, half %3)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%6 = extractelement <8 x i1> %5, i64 0
|
||||
%7 = select i1 %6, half %4, half %3
|
||||
%8 = insertelement <8 x half> %x1, half %7, i64 0
|
||||
ret <8 x half> %8
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_maskz_vfmadd_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3, i32 %x4 ){
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sh:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0x89,0xa9,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_sh:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0x89,0xa9,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = extractelement <8 x half> %x0, i64 0
|
||||
%2 = extractelement <8 x half> %x1, i64 0
|
||||
%3 = extractelement <8 x half> %x2, i64 0
|
||||
%4 = call half @llvm.fma.f16(half %1, half %2, half %3)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%6 = extractelement <8 x i1> %5, i64 0
|
||||
%7 = select i1 %6, half %4, half 0.000000e+00
|
||||
%8 = insertelement <8 x half> %x0, half %7, i64 0
|
||||
%9 = extractelement <8 x half> %x0, i64 0
|
||||
%10 = extractelement <8 x half> %x1, i64 0
|
||||
%11 = extractelement <8 x half> %x2, i64 0
|
||||
%12 = call half @llvm.x86.avx512fp16.vfmadd.f16(half %9, half %10, half %11, i32 3)
|
||||
%13 = bitcast i8 %x3 to <8 x i1>
|
||||
%14 = extractelement <8 x i1> %13, i64 0
|
||||
%15 = select i1 %14, half %12, half 0.000000e+00
|
||||
%16 = insertelement <8 x half> %x0, half %15, i64 0
|
||||
%res2 = fadd <8 x half> %8, %16
|
||||
ret <8 x half> %8
|
||||
}
|
||||
|
||||
define void @fmadd_sh_mask_memfold(half* %a, half* %b, i8 %c) {
|
||||
; X86-LABEL: fmadd_sh_mask_memfold:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x0c]
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vmovsh (%ecx), %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0x01]
|
||||
; X86-NEXT: vmovsh (%eax), %xmm1 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0x08]
|
||||
; X86-NEXT: vfmadd213sh %xmm0, %xmm0, %xmm1 # encoding: [0x62,0xf6,0x7d,0x08,0xa9,0xc8]
|
||||
; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x10,0xc1]
|
||||
; X86-NEXT: vmovsh %xmm0, (%ecx) # encoding: [0x62,0xf5,0x7e,0x08,0x11,0x01]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: fmadd_sh_mask_memfold:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vmovsh (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0x07]
|
||||
; X64-NEXT: vmovsh (%rsi), %xmm1 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0x0e]
|
||||
; X64-NEXT: vfmadd213sh %xmm0, %xmm0, %xmm1 # encoding: [0x62,0xf6,0x7d,0x08,0xa9,0xc8]
|
||||
; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
|
||||
; X64-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x10,0xc1]
|
||||
; X64-NEXT: vmovsh %xmm0, (%rdi) # encoding: [0x62,0xf5,0x7e,0x08,0x11,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%a.val = load half, half* %a
|
||||
%av0 = insertelement <8 x half> undef, half %a.val, i32 0
|
||||
%av1 = insertelement <8 x half> %av0, half 0.000000e+00, i32 1
|
||||
%av2 = insertelement <8 x half> %av1, half 0.000000e+00, i32 2
|
||||
%av3 = insertelement <8 x half> %av2, half 0.000000e+00, i32 3
|
||||
%av4 = insertelement <8 x half> %av3, half 0.000000e+00, i32 4
|
||||
%av5 = insertelement <8 x half> %av4, half 0.000000e+00, i32 5
|
||||
%av6 = insertelement <8 x half> %av5, half 0.000000e+00, i32 6
|
||||
%av = insertelement <8 x half> %av6, half 0.000000e+00, i32 7
|
||||
|
||||
%b.val = load half, half* %b
|
||||
%bv0 = insertelement <8 x half> undef, half %b.val, i32 0
|
||||
%bv1 = insertelement <8 x half> %bv0, half 0.000000e+00, i32 1
|
||||
%bv2 = insertelement <8 x half> %bv1, half 0.000000e+00, i32 2
|
||||
%bv3 = insertelement <8 x half> %bv2, half 0.000000e+00, i32 3
|
||||
%bv4 = insertelement <8 x half> %bv3, half 0.000000e+00, i32 4
|
||||
%bv5 = insertelement <8 x half> %bv4, half 0.000000e+00, i32 5
|
||||
%bv6 = insertelement <8 x half> %bv5, half 0.000000e+00, i32 6
|
||||
%bv = insertelement <8 x half> %bv6, half 0.000000e+00, i32 7
|
||||
%1 = extractelement <8 x half> %av, i64 0
|
||||
%2 = extractelement <8 x half> %bv, i64 0
|
||||
%3 = extractelement <8 x half> %av, i64 0
|
||||
%4 = call half @llvm.fma.f16(half %1, half %2, half %3)
|
||||
%5 = bitcast i8 %c to <8 x i1>
|
||||
%6 = extractelement <8 x i1> %5, i64 0
|
||||
%7 = select i1 %6, half %4, half %1
|
||||
%8 = insertelement <8 x half> %av, half %7, i64 0
|
||||
%sr = extractelement <8 x half> %8, i32 0
|
||||
store half %sr, half* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
declare half @llvm.fma.f16(half, half, half)
|
||||
declare half @llvm.x86.avx512fp16.vfmadd.f16(half, half, half, i32)
|
||||
|
||||
declare <32 x half> @llvm.fma.v32f16(<32 x half>, <32 x half>, <32 x half>)
|
|
@ -0,0 +1,530 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
|
||||
|
||||
|
||||
define <16 x half> @test_x86_vfnmadd_ph_z_256(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_ph_z_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ph %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x75,0x28,0xac,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %1, <16 x half> %a2)
|
||||
ret <16 x half> %2
|
||||
}
|
||||
|
||||
define <16 x half> @test_mask_vfnmadd_ph_256(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, i16 %mask) {
|
||||
; X86-LABEL: test_mask_vfnmadd_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmadd132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9c,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_vfnmadd_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmadd132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9c,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %1, <16 x half> %a2)
|
||||
%3 = bitcast i16 %mask to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %a0
|
||||
ret <16 x half> %4
|
||||
}
|
||||
|
||||
define <16 x half> @test_x86_vfnmsubph_z_256(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubph_z_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ph %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x75,0x28,0xae,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a2
|
||||
%3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %1, <16 x half> %2)
|
||||
ret <16 x half> %3
|
||||
}
|
||||
|
||||
define <16 x half> @test_mask_vfnmsub_ph_256(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2, i16 %mask) {
|
||||
; X86-LABEL: test_mask_vfnmsub_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9e,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_vfnmsub_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9e,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a2
|
||||
%3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %a0, <16 x half> %1, <16 x half> %2)
|
||||
%4 = bitcast i16 %mask to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x half> %3, <16 x half> %a0
|
||||
ret <16 x half> %5
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask3_vfmaddsub_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xb6,0xd1]
|
||||
; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xb6,0xd1]
|
||||
; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2)
|
||||
%bc = bitcast i16 %x3 to <16 x i1>
|
||||
%sel = select <16 x i1> %bc, <16 x half> %res, <16 x half> %x2
|
||||
ret <16 x half> %sel
|
||||
}
|
||||
declare <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half>, <16 x half>, <16 x half>)
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_maskz_vfmaddsub_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub213ph %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xa9,0xa6,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub213ph %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xa9,0xa6,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2)
|
||||
%bc = bitcast i16 %x3 to <16 x i1>
|
||||
%sel = select <16 x i1> %bc, <16 x half> %res, <16 x half> zeroinitializer
|
||||
ret <16 x half> %sel
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask3_vfmsubadd_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmsubadd231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xb7,0xd1]
|
||||
; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmsubadd231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xb7,0xd1]
|
||||
; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%neg = fneg <16 x half> %x2
|
||||
%res = call <16 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %neg)
|
||||
%bc = bitcast i16 %x3 to <16 x i1>
|
||||
%sel = select <16 x i1> %bc, <16 x half> %res, <16 x half> %x2
|
||||
ret <16 x half> %sel
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask3_vfmsub_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmsub231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xba,0xd1]
|
||||
; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmsub231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xba,0xd1]
|
||||
; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %x0, <16 x half> %x1, <16 x half> %1)
|
||||
%3 = bitcast i16 %x3 to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %x2
|
||||
ret <16 x half> %4
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask3_vfmadd_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xb8,0xd1]
|
||||
; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xb8,0xd1]
|
||||
; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <16 x half> @llvm.fma.v16f16(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x half> %1, <16 x half> %x2
|
||||
ret <16 x half> %3
|
||||
}
|
||||
|
||||
define <16 x half> @test_int_x86_avx512_maskz_vfmadd_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xa9,0xa8,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0xa9,0xa8,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <16 x half> @llvm.fma.v16f16(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x half> %1, <16 x half> zeroinitializer
|
||||
ret <16 x half> %3
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask_vfnmsub_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9e,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9e,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x1
|
||||
%2 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %x0, <16 x half> %1, <16 x half> %2)
|
||||
%4 = bitcast i16 %x3 to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x half> %3, <16 x half> %x0
|
||||
ret <16 x half> %5
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask3_vfnmsub_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xbe,0xd1]
|
||||
; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub231ph %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x29,0xbe,0xd1]
|
||||
; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x0
|
||||
%2 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %1, <16 x half> %x1, <16 x half> %2)
|
||||
%4 = bitcast i16 %x3 to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x half> %3, <16 x half> %x2
|
||||
ret <16 x half> %5
|
||||
}
|
||||
|
||||
define <16 x half>@test_int_x86_avx512_mask_vfnmadd_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_ph_256:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmadd132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9c,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmadd132ph %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x29,0x9c,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x1
|
||||
%2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %x0, <16 x half> %1, <16 x half> %x2)
|
||||
%3 = bitcast i16 %x3 to <16 x i1>
|
||||
%4 = select <16 x i1> %3, <16 x half> %2, <16 x half> %x0
|
||||
ret <16 x half> %4
|
||||
}
|
||||
|
||||
define <16 x half> @test_x86_fma_vfnmadd_ph_256(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ph %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x75,0x28,0xac,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%2 = call <16 x half> @llvm.fma.v16f16(<16 x half> %1, <16 x half> %a1, <16 x half> %a2)
|
||||
ret <16 x half> %2
|
||||
}
|
||||
|
||||
define <16 x half> @test_x86_fma_vfnmsub_ph_256(<16 x half> %a0, <16 x half> %a1, <16 x half> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ph_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ph %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x75,0x28,0xae,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%2 = fsub <16 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%3 = call <16 x half> @llvm.fma.v16f16(<16 x half> %1, <16 x half> %a1, <16 x half> %2)
|
||||
ret <16 x half> %3
|
||||
}
|
||||
|
||||
declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
|
||||
|
||||
define <8 x half> @test_x86_vfnmadd_ph_z_128(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_ph_z_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ph %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x75,0x08,0xac,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %1, <8 x half> %a2)
|
||||
ret <8 x half> %2
|
||||
}
|
||||
|
||||
define <8 x half> @test_mask_vfnmadd_ph_128(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
|
||||
; X86-LABEL: test_mask_vfnmadd_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmadd132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9c,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_vfnmadd_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmadd132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9c,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %1, <8 x half> %a2)
|
||||
%3 = bitcast i8 %mask to <8 x i1>
|
||||
%4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %a0
|
||||
ret <8 x half> %4
|
||||
}
|
||||
|
||||
define <8 x half> @test_x86_vfnmsubph_z_128(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubph_z_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ph %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x75,0x08,0xae,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a2
|
||||
%3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %1, <8 x half> %2)
|
||||
ret <8 x half> %3
|
||||
}
|
||||
|
||||
define <8 x half> @test_mask_vfnmsub_ph_128(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
|
||||
; X86-LABEL: test_mask_vfnmsub_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9e,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_vfnmsub_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9e,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a1
|
||||
%2 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a2
|
||||
%3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %a0, <8 x half> %1, <8 x half> %2)
|
||||
%4 = bitcast i8 %mask to <8 x i1>
|
||||
%5 = select <8 x i1> %4, <8 x half> %3, <8 x half> %a0
|
||||
ret <8 x half> %5
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask3_vfmaddsub_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb6,0xd1]
|
||||
; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb6,0xd1]
|
||||
; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2)
|
||||
%bc = bitcast i8 %x3 to <8 x i1>
|
||||
%sel = select <8 x i1> %bc, <8 x half> %res, <8 x half> %x2
|
||||
ret <8 x half> %sel
|
||||
}
|
||||
declare <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half>, <8 x half>, <8 x half>)
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_maskz_vfmaddsub_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmaddsub213ph %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0x89,0xa6,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmaddsub213ph %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0x89,0xa6,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2)
|
||||
%bc = bitcast i8 %x3 to <8 x i1>
|
||||
%sel = select <8 x i1> %bc, <8 x half> %res, <8 x half> zeroinitializer
|
||||
ret <8 x half> %sel
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask3_vfmsubadd_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmsubadd231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb7,0xd1]
|
||||
; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmsubadd231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb7,0xd1]
|
||||
; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%neg = fneg <8 x half> %x2
|
||||
%res = call <8 x half> @llvm.x86.avx512fp16.vfmaddsub.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %neg)
|
||||
%bc = bitcast i8 %x3 to <8 x i1>
|
||||
%sel = select <8 x i1> %bc, <8 x half> %res, <8 x half> %x2
|
||||
ret <8 x half> %sel
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask3_vfmsub_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmsub231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xba,0xd1]
|
||||
; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmsub231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xba,0xd1]
|
||||
; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %1)
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %x2
|
||||
ret <8 x half> %4
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask3_vfmadd_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb8,0xd1]
|
||||
; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xb8,0xd1]
|
||||
; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <8 x half> @llvm.fma.v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x half> %1, <8 x half> %x2
|
||||
ret <8 x half> %3
|
||||
}
|
||||
|
||||
define <8 x half> @test_int_x86_avx512_maskz_vfmadd_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) {
|
||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0x89,0xa8,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x75,0x89,0xa8,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <8 x half> @llvm.fma.v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x half> %1, <8 x half> zeroinitializer
|
||||
ret <8 x half> %3
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask_vfnmsub_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9e,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9e,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x1
|
||||
%2 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %x0, <8 x half> %1, <8 x half> %2)
|
||||
%4 = bitcast i8 %x3 to <8 x i1>
|
||||
%5 = select <8 x i1> %4, <8 x half> %3, <8 x half> %x0
|
||||
ret <8 x half> %5
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask3_vfnmsub_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmsub231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xbe,0xd1]
|
||||
; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmsub231ph %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7d,0x09,0xbe,0xd1]
|
||||
; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x0
|
||||
%2 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x2
|
||||
%3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %1, <8 x half> %x1, <8 x half> %2)
|
||||
%4 = bitcast i8 %x3 to <8 x i1>
|
||||
%5 = select <8 x i1> %4, <8 x half> %3, <8 x half> %x2
|
||||
ret <8 x half> %5
|
||||
}
|
||||
|
||||
define <8 x half>@test_int_x86_avx512_mask_vfnmadd_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3){
|
||||
; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_ph_128:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vfnmadd132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9c,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vfnmadd132ph %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6d,0x09,0x9c,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %x1
|
||||
%2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %x0, <8 x half> %1, <8 x half> %x2)
|
||||
%3 = bitcast i8 %x3 to <8 x i1>
|
||||
%4 = select <8 x i1> %3, <8 x half> %2, <8 x half> %x0
|
||||
ret <8 x half> %4
|
||||
}
|
||||
|
||||
define <8 x half> @test_x86_fma_vfnmadd_ph_128(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ph %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x75,0x08,0xac,0xc2]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%2 = call <8 x half> @llvm.fma.v8f16(<8 x half> %1, <8 x half> %a1, <8 x half> %a2)
|
||||
ret <8 x half> %2
|
||||
}
|
||||
|
||||
define <8 x half> @test_x86_fma_vfnmsub_ph_128(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) #0 {
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ph %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x75,0x08,0xae,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%1 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%2 = fsub <8 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %a0
|
||||
%3 = call <8 x half> @llvm.fma.v8f16(<8 x half> %1, <8 x half> %a1, <8 x half> %2)
|
||||
ret <8 x half> %3
|
||||
}
|
||||
|
||||
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
|
|
@ -11,6 +11,7 @@ declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
|
|||
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata)
|
||||
declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata)
|
||||
|
||||
define half @fadd_f16(half %a, half %b) nounwind strictfp {
|
||||
; X86-LABEL: fadd_f16:
|
||||
|
@ -197,4 +198,22 @@ define void @fsqrt_f16(half* %a) nounwind strictfp {
|
|||
ret void
|
||||
}
|
||||
|
||||
define half @fma_f16(half %a, half %b, half %c) nounwind strictfp {
|
||||
; X86-LABEL: fma_f16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm1
|
||||
; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-NEXT: vfmadd213sh {{[0-9]+}}(%esp), %xmm1, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fma_f16:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%res = call half @llvm.experimental.constrained.fma.f16(half %a, half %b, half %c,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret half %res
|
||||
}
|
||||
|
||||
attributes #0 = { strictfp }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -17,6 +17,7 @@ declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
|
|||
declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata)
|
||||
declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata)
|
||||
declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, metadata, metadata)
|
||||
|
||||
define <8 x half> @f2(<8 x half> %a, <8 x half> %b) #0 {
|
||||
; CHECK-LABEL: f2:
|
||||
|
@ -101,6 +102,17 @@ define <2 x double> @f12(<2 x double> %a0, <8 x half> %a1) #0 {
|
|||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <8 x half> @f13(<8 x half> %a, <8 x half> %b, <8 x half> %c) #0 {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <2 x double> @f15(<2 x half> %a) #0 {
|
||||
; CHECK-LABEL: f15:
|
||||
; CHECK: # %bb.0:
|
||||
|
|
|
@ -7,6 +7,7 @@ declare <16 x half> @llvm.experimental.constrained.fsub.v16f16(<16 x half>, <16
|
|||
declare <16 x half> @llvm.experimental.constrained.fmul.v16f16(<16 x half>, <16 x half>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.fdiv.v16f16(<16 x half>, <16 x half>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.sqrt.v16f16(<16 x half>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, metadata, metadata)
|
||||
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(<4 x half>, metadata)
|
||||
declare <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half>, metadata)
|
||||
declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double>, metadata, metadata)
|
||||
|
@ -98,6 +99,17 @@ define <4 x half> @f12(<4 x double> %a) #0 {
|
|||
ret <4 x half> %ret
|
||||
}
|
||||
|
||||
define <16 x half> @f13(<16 x half> %a, <16 x half> %b, <16 x half> %c) #0 {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x half> @llvm.experimental.constrained.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <16 x half> %res
|
||||
}
|
||||
|
||||
define <8 x float> @f14(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: # %bb.0:
|
||||
|
|
|
@ -11,6 +11,7 @@ declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(<8 x half>
|
|||
declare <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half>, metadata)
|
||||
declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double>, metadata, metadata)
|
||||
declare <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(<16 x float>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.fma.v32f16(<32 x half>, <32 x half>, <32 x half>, metadata, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.ceil.v32f16(<32 x half>, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.floor.v32f16(<32 x half>, metadata)
|
||||
declare <32 x half> @llvm.experimental.constrained.trunc.v32f16(<32 x half>, metadata)
|
||||
|
@ -97,6 +98,17 @@ define <8 x half> @f12(<8 x double> %a) #0 {
|
|||
ret <8 x half> %ret
|
||||
}
|
||||
|
||||
define <32 x half> @f13(<32 x half> %a, <32 x half> %b, <32 x half> %c) #0 {
|
||||
; CHECK-LABEL: f13:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <32 x half> @llvm.experimental.constrained.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c,
|
||||
metadata !"round.dynamic",
|
||||
metadata !"fpexcept.strict") #0
|
||||
ret <32 x half> %res
|
||||
}
|
||||
|
||||
define <16 x float> @f14(<16 x half> %a) #0 {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: # %bb.0:
|
||||
|
|
|
@ -1764,3 +1764,723 @@
|
|||
# ATT: vsqrtsh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vsqrtsh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x65,0x16,0x87,0x51,0x72,0x80
|
||||
|
||||
# ATT: vfmadd132ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmadd132ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0x98,0xf4
|
||||
|
||||
# ATT: vfmadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmadd132ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x98,0xf4
|
||||
|
||||
# ATT: vfmadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmadd132ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmadd132ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0x98,0x31
|
||||
|
||||
# ATT: vfmadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmadd132ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0x98,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmadd132ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0x98,0x72,0x80
|
||||
|
||||
# ATT: vfmadd132sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmadd132sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0x99,0xf4
|
||||
|
||||
# ATT: vfmadd132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmadd132sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x99,0xf4
|
||||
|
||||
# ATT: vfmadd132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfmadd132sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0x99,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd132sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfmadd132sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0x99,0x31
|
||||
|
||||
# ATT: vfmadd132sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfmadd132sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0x99,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfmadd132sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0x99,0x72,0x80
|
||||
|
||||
# ATT: vfmadd213ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmadd213ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xa8,0xf4
|
||||
|
||||
# ATT: vfmadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmadd213ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xa8,0xf4
|
||||
|
||||
# ATT: vfmadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmadd213ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmadd213ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xa8,0x31
|
||||
|
||||
# ATT: vfmadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmadd213ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xa8,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmadd213ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xa8,0x72,0x80
|
||||
|
||||
# ATT: vfmadd213sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmadd213sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xa9,0xf4
|
||||
|
||||
# ATT: vfmadd213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmadd213sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xa9,0xf4
|
||||
|
||||
# ATT: vfmadd213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfmadd213sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xa9,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd213sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfmadd213sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xa9,0x31
|
||||
|
||||
# ATT: vfmadd213sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfmadd213sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xa9,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfmadd213sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xa9,0x72,0x80
|
||||
|
||||
# ATT: vfmadd231ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmadd231ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xb8,0xf4
|
||||
|
||||
# ATT: vfmadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmadd231ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xb8,0xf4
|
||||
|
||||
# ATT: vfmadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmadd231ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmadd231ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xb8,0x31
|
||||
|
||||
# ATT: vfmadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmadd231ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xb8,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmadd231ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xb8,0x72,0x80
|
||||
|
||||
# ATT: vfmadd231sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmadd231sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xb9,0xf4
|
||||
|
||||
# ATT: vfmadd231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmadd231sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xb9,0xf4
|
||||
|
||||
# ATT: vfmadd231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfmadd231sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xb9,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd231sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfmadd231sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xb9,0x31
|
||||
|
||||
# ATT: vfmadd231sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfmadd231sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xb9,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfmadd231sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xb9,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub132ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub132ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0x96,0xf4
|
||||
|
||||
# ATT: vfmaddsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub132ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x96,0xf4
|
||||
|
||||
# ATT: vfmaddsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmaddsub132ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0x96,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub132ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0x96,0x31
|
||||
|
||||
# ATT: vfmaddsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub132ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0x96,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmaddsub132ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0x96,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub213ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub213ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xa6,0xf4
|
||||
|
||||
# ATT: vfmaddsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub213ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xa6,0xf4
|
||||
|
||||
# ATT: vfmaddsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmaddsub213ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xa6,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub213ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xa6,0x31
|
||||
|
||||
# ATT: vfmaddsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub213ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xa6,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmaddsub213ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xa6,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub231ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub231ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xb6,0xf4
|
||||
|
||||
# ATT: vfmaddsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub231ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xb6,0xf4
|
||||
|
||||
# ATT: vfmaddsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmaddsub231ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xb6,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub231ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xb6,0x31
|
||||
|
||||
# ATT: vfmaddsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmaddsub231ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xb6,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmaddsub231ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xb6,0x72,0x80
|
||||
|
||||
# ATT: vfmsub132ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsub132ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0x9a,0xf4
|
||||
|
||||
# ATT: vfmsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsub132ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x9a,0xf4
|
||||
|
||||
# ATT: vfmsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmsub132ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmsub132ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0x9a,0x31
|
||||
|
||||
# ATT: vfmsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmsub132ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0x9a,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmsub132ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0x9a,0x72,0x80
|
||||
|
||||
# ATT: vfmsub132sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmsub132sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0x9b,0xf4
|
||||
|
||||
# ATT: vfmsub132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmsub132sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x9b,0xf4
|
||||
|
||||
# ATT: vfmsub132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfmsub132sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0x9b,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub132sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfmsub132sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0x9b,0x31
|
||||
|
||||
# ATT: vfmsub132sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfmsub132sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0x9b,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfmsub132sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0x9b,0x72,0x80
|
||||
|
||||
# ATT: vfmsub213ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsub213ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xaa,0xf4
|
||||
|
||||
# ATT: vfmsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsub213ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xaa,0xf4
|
||||
|
||||
# ATT: vfmsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmsub213ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmsub213ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xaa,0x31
|
||||
|
||||
# ATT: vfmsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmsub213ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xaa,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmsub213ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xaa,0x72,0x80
|
||||
|
||||
# ATT: vfmsub213sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmsub213sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xab,0xf4
|
||||
|
||||
# ATT: vfmsub213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmsub213sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xab,0xf4
|
||||
|
||||
# ATT: vfmsub213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfmsub213sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xab,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub213sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfmsub213sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xab,0x31
|
||||
|
||||
# ATT: vfmsub213sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfmsub213sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xab,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfmsub213sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xab,0x72,0x80
|
||||
|
||||
# ATT: vfmsub231ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsub231ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xba,0xf4
|
||||
|
||||
# ATT: vfmsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsub231ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xba,0xf4
|
||||
|
||||
# ATT: vfmsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmsub231ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmsub231ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xba,0x31
|
||||
|
||||
# ATT: vfmsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmsub231ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xba,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmsub231ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xba,0x72,0x80
|
||||
|
||||
# ATT: vfmsub231sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmsub231sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xbb,0xf4
|
||||
|
||||
# ATT: vfmsub231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfmsub231sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xbb,0xf4
|
||||
|
||||
# ATT: vfmsub231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfmsub231sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xbb,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub231sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfmsub231sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xbb,0x31
|
||||
|
||||
# ATT: vfmsub231sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfmsub231sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xbb,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfmsub231sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xbb,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd132ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd132ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0x97,0xf4
|
||||
|
||||
# ATT: vfmsubadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd132ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x97,0xf4
|
||||
|
||||
# ATT: vfmsubadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmsubadd132ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0x97,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd132ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0x97,0x31
|
||||
|
||||
# ATT: vfmsubadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd132ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0x97,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmsubadd132ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0x97,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd213ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd213ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xa7,0xf4
|
||||
|
||||
# ATT: vfmsubadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd213ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xa7,0xf4
|
||||
|
||||
# ATT: vfmsubadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmsubadd213ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xa7,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd213ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xa7,0x31
|
||||
|
||||
# ATT: vfmsubadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd213ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xa7,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmsubadd213ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xa7,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd231ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd231ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xb7,0xf4
|
||||
|
||||
# ATT: vfmsubadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd231ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xb7,0xf4
|
||||
|
||||
# ATT: vfmsubadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfmsubadd231ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xb7,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd231ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xb7,0x31
|
||||
|
||||
# ATT: vfmsubadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfmsubadd231ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xb7,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfmsubadd231ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xb7,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd132ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd132ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0x9c,0xf4
|
||||
|
||||
# ATT: vfnmadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd132ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x9c,0xf4
|
||||
|
||||
# ATT: vfnmadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfnmadd132ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd132ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0x9c,0x31
|
||||
|
||||
# ATT: vfnmadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfnmadd132ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0x9c,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfnmadd132ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0x9c,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd132sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmadd132sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0x9d,0xf4
|
||||
|
||||
# ATT: vfnmadd132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmadd132sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x9d,0xf4
|
||||
|
||||
# ATT: vfnmadd132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfnmadd132sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0x9d,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd132sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfnmadd132sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0x9d,0x31
|
||||
|
||||
# ATT: vfnmadd132sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfnmadd132sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0x9d,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfnmadd132sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0x9d,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd213ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd213ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xac,0xf4
|
||||
|
||||
# ATT: vfnmadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd213ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xac,0xf4
|
||||
|
||||
# ATT: vfnmadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfnmadd213ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd213ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xac,0x31
|
||||
|
||||
# ATT: vfnmadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfnmadd213ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xac,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfnmadd213ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xac,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd213sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmadd213sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xad,0xf4
|
||||
|
||||
# ATT: vfnmadd213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmadd213sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xad,0xf4
|
||||
|
||||
# ATT: vfnmadd213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfnmadd213sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xad,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd213sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfnmadd213sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xad,0x31
|
||||
|
||||
# ATT: vfnmadd213sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfnmadd213sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xad,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfnmadd213sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xad,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd231ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd231ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xbc,0xf4
|
||||
|
||||
# ATT: vfnmadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd231ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xbc,0xf4
|
||||
|
||||
# ATT: vfnmadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfnmadd231ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfnmadd231ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xbc,0x31
|
||||
|
||||
# ATT: vfnmadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfnmadd231ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xbc,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfnmadd231ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xbc,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd231sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmadd231sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xbd,0xf4
|
||||
|
||||
# ATT: vfnmadd231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmadd231sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xbd,0xf4
|
||||
|
||||
# ATT: vfnmadd231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfnmadd231sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xbd,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd231sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfnmadd231sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xbd,0x31
|
||||
|
||||
# ATT: vfnmadd231sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfnmadd231sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xbd,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfnmadd231sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xbd,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub132ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub132ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0x9e,0xf4
|
||||
|
||||
# ATT: vfnmsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub132ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x9e,0xf4
|
||||
|
||||
# ATT: vfnmsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfnmsub132ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub132ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0x9e,0x31
|
||||
|
||||
# ATT: vfnmsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfnmsub132ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0x9e,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfnmsub132ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0x9e,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub132sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmsub132sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0x9f,0xf4
|
||||
|
||||
# ATT: vfnmsub132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmsub132sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0x9f,0xf4
|
||||
|
||||
# ATT: vfnmsub132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfnmsub132sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0x9f,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub132sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfnmsub132sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0x9f,0x31
|
||||
|
||||
# ATT: vfnmsub132sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfnmsub132sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0x9f,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfnmsub132sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0x9f,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub213ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub213ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xae,0xf4
|
||||
|
||||
# ATT: vfnmsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub213ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xae,0xf4
|
||||
|
||||
# ATT: vfnmsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfnmsub213ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub213ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xae,0x31
|
||||
|
||||
# ATT: vfnmsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfnmsub213ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xae,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfnmsub213ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xae,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub213sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmsub213sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xaf,0xf4
|
||||
|
||||
# ATT: vfnmsub213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmsub213sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xaf,0xf4
|
||||
|
||||
# ATT: vfnmsub213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfnmsub213sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xaf,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub213sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfnmsub213sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xaf,0x31
|
||||
|
||||
# ATT: vfnmsub213sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfnmsub213sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xaf,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfnmsub213sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xaf,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub231ph %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub231ph zmm30, zmm29, zmm28
|
||||
0x62,0x06,0x15,0x40,0xbe,0xf4
|
||||
|
||||
# ATT: vfnmsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub231ph zmm30, zmm29, zmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xbe,0xf4
|
||||
|
||||
# ATT: vfnmsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
# INTEL: vfnmsub231ph zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x47,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
# INTEL: vfnmsub231ph zmm30, zmm29, word ptr [r9]{1to32}
|
||||
0x62,0x46,0x15,0x50,0xbe,0x31
|
||||
|
||||
# ATT: vfnmsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
# INTEL: vfnmsub231ph zmm30, zmm29, zmmword ptr [rcx + 8128]
|
||||
0x62,0x66,0x15,0x40,0xbe,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
# INTEL: vfnmsub231ph zmm30 {k7} {z}, zmm29, word ptr [rdx - 256]{1to32}
|
||||
0x62,0x66,0x15,0xd7,0xbe,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub231sh %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmsub231sh xmm30, xmm29, xmm28
|
||||
0x62,0x06,0x15,0x00,0xbf,0xf4
|
||||
|
||||
# ATT: vfnmsub231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
# INTEL: vfnmsub231sh xmm30, xmm29, xmm28, {rn-sae}
|
||||
0x62,0x06,0x15,0x10,0xbf,0xf4
|
||||
|
||||
# ATT: vfnmsub231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
# INTEL: vfnmsub231sh xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
|
||||
0x62,0x26,0x15,0x07,0xbf,0xb4,0xf5,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub231sh (%r9), %xmm29, %xmm30
|
||||
# INTEL: vfnmsub231sh xmm30, xmm29, word ptr [r9]
|
||||
0x62,0x46,0x15,0x00,0xbf,0x31
|
||||
|
||||
# ATT: vfnmsub231sh 254(%rcx), %xmm29, %xmm30
|
||||
# INTEL: vfnmsub231sh xmm30, xmm29, word ptr [rcx + 254]
|
||||
0x62,0x66,0x15,0x00,0xbf,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
# INTEL: vfnmsub231sh xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
|
||||
0x62,0x66,0x15,0x87,0xbf,0x72,0x80
|
||||
|
|
|
@ -1492,3 +1492,723 @@
|
|||
# ATT: vsqrtph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
# INTEL: vsqrtph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf5,0x7c,0xbf,0x51,0x72,0x80
|
||||
|
||||
# ATT: vfmadd132ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmadd132ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0x98,0xf4
|
||||
|
||||
# ATT: vfmadd132ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmadd132ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0x98,0xf4
|
||||
|
||||
# ATT: vfmadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmadd132ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0x98,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmadd132ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0x98,0x31
|
||||
|
||||
# ATT: vfmadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmadd132ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0x98,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmadd132ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0x98,0x72,0x80
|
||||
|
||||
# ATT: vfmadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmadd132ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0x98,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmadd132ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0x98,0x31
|
||||
|
||||
# ATT: vfmadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmadd132ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0x98,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmadd132ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0x98,0x72,0x80
|
||||
|
||||
# ATT: vfmadd213ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmadd213ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xa8,0xf4
|
||||
|
||||
# ATT: vfmadd213ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmadd213ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xa8,0xf4
|
||||
|
||||
# ATT: vfmadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmadd213ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xa8,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmadd213ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xa8,0x31
|
||||
|
||||
# ATT: vfmadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmadd213ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xa8,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmadd213ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xa8,0x72,0x80
|
||||
|
||||
# ATT: vfmadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmadd213ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xa8,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmadd213ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xa8,0x31
|
||||
|
||||
# ATT: vfmadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmadd213ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xa8,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmadd213ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xa8,0x72,0x80
|
||||
|
||||
# ATT: vfmadd231ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmadd231ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xb8,0xf4
|
||||
|
||||
# ATT: vfmadd231ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmadd231ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xb8,0xf4
|
||||
|
||||
# ATT: vfmadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmadd231ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xb8,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmadd231ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xb8,0x31
|
||||
|
||||
# ATT: vfmadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmadd231ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xb8,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmadd231ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xb8,0x72,0x80
|
||||
|
||||
# ATT: vfmadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmadd231ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xb8,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmadd231ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xb8,0x31
|
||||
|
||||
# ATT: vfmadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmadd231ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xb8,0x71,0x7f
|
||||
|
||||
# ATT: vfmadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmadd231ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xb8,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub132ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub132ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0x96,0xf4
|
||||
|
||||
# ATT: vfmaddsub132ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub132ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0x96,0xf4
|
||||
|
||||
# ATT: vfmaddsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmaddsub132ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0x96,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub132ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0x96,0x31
|
||||
|
||||
# ATT: vfmaddsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub132ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0x96,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmaddsub132ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0x96,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmaddsub132ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0x96,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub132ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0x96,0x31
|
||||
|
||||
# ATT: vfmaddsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub132ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0x96,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmaddsub132ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0x96,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub213ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub213ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xa6,0xf4
|
||||
|
||||
# ATT: vfmaddsub213ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub213ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xa6,0xf4
|
||||
|
||||
# ATT: vfmaddsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmaddsub213ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xa6,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub213ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xa6,0x31
|
||||
|
||||
# ATT: vfmaddsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub213ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xa6,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmaddsub213ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xa6,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmaddsub213ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xa6,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub213ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xa6,0x31
|
||||
|
||||
# ATT: vfmaddsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub213ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xa6,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmaddsub213ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xa6,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub231ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub231ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xb6,0xf4
|
||||
|
||||
# ATT: vfmaddsub231ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub231ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xb6,0xf4
|
||||
|
||||
# ATT: vfmaddsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmaddsub231ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xb6,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub231ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xb6,0x31
|
||||
|
||||
# ATT: vfmaddsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmaddsub231ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xb6,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmaddsub231ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xb6,0x72,0x80
|
||||
|
||||
# ATT: vfmaddsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmaddsub231ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xb6,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmaddsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub231ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xb6,0x31
|
||||
|
||||
# ATT: vfmaddsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmaddsub231ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xb6,0x71,0x7f
|
||||
|
||||
# ATT: vfmaddsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmaddsub231ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xb6,0x72,0x80
|
||||
|
||||
# ATT: vfmsub132ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmsub132ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0x9a,0xf4
|
||||
|
||||
# ATT: vfmsub132ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmsub132ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0x9a,0xf4
|
||||
|
||||
# ATT: vfmsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmsub132ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0x9a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmsub132ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0x9a,0x31
|
||||
|
||||
# ATT: vfmsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmsub132ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0x9a,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmsub132ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0x9a,0x72,0x80
|
||||
|
||||
# ATT: vfmsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmsub132ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0x9a,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmsub132ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0x9a,0x31
|
||||
|
||||
# ATT: vfmsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmsub132ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0x9a,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmsub132ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0x9a,0x72,0x80
|
||||
|
||||
# ATT: vfmsub213ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmsub213ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xaa,0xf4
|
||||
|
||||
# ATT: vfmsub213ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmsub213ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xaa,0xf4
|
||||
|
||||
# ATT: vfmsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmsub213ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xaa,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmsub213ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xaa,0x31
|
||||
|
||||
# ATT: vfmsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmsub213ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xaa,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmsub213ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xaa,0x72,0x80
|
||||
|
||||
# ATT: vfmsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmsub213ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xaa,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmsub213ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xaa,0x31
|
||||
|
||||
# ATT: vfmsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmsub213ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xaa,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmsub213ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xaa,0x72,0x80
|
||||
|
||||
# ATT: vfmsub231ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmsub231ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xba,0xf4
|
||||
|
||||
# ATT: vfmsub231ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmsub231ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xba,0xf4
|
||||
|
||||
# ATT: vfmsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmsub231ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xba,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmsub231ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xba,0x31
|
||||
|
||||
# ATT: vfmsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmsub231ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xba,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmsub231ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xba,0x72,0x80
|
||||
|
||||
# ATT: vfmsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmsub231ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xba,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmsub231ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xba,0x31
|
||||
|
||||
# ATT: vfmsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmsub231ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xba,0x71,0x7f
|
||||
|
||||
# ATT: vfmsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmsub231ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xba,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd132ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd132ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0x97,0xf4
|
||||
|
||||
# ATT: vfmsubadd132ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd132ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0x97,0xf4
|
||||
|
||||
# ATT: vfmsubadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmsubadd132ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0x97,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd132ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0x97,0x31
|
||||
|
||||
# ATT: vfmsubadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd132ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0x97,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmsubadd132ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0x97,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmsubadd132ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0x97,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd132ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0x97,0x31
|
||||
|
||||
# ATT: vfmsubadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd132ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0x97,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmsubadd132ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0x97,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd213ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd213ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xa7,0xf4
|
||||
|
||||
# ATT: vfmsubadd213ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd213ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xa7,0xf4
|
||||
|
||||
# ATT: vfmsubadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmsubadd213ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xa7,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd213ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xa7,0x31
|
||||
|
||||
# ATT: vfmsubadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd213ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xa7,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmsubadd213ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xa7,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmsubadd213ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xa7,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd213ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xa7,0x31
|
||||
|
||||
# ATT: vfmsubadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd213ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xa7,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmsubadd213ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xa7,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd231ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd231ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xb7,0xf4
|
||||
|
||||
# ATT: vfmsubadd231ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd231ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xb7,0xf4
|
||||
|
||||
# ATT: vfmsubadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfmsubadd231ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xb7,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd231ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xb7,0x31
|
||||
|
||||
# ATT: vfmsubadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfmsubadd231ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xb7,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfmsubadd231ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xb7,0x72,0x80
|
||||
|
||||
# ATT: vfmsubadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfmsubadd231ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xb7,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfmsubadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd231ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xb7,0x31
|
||||
|
||||
# ATT: vfmsubadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfmsubadd231ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xb7,0x71,0x7f
|
||||
|
||||
# ATT: vfmsubadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfmsubadd231ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xb7,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd132ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfnmadd132ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0x9c,0xf4
|
||||
|
||||
# ATT: vfnmadd132ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfnmadd132ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0x9c,0xf4
|
||||
|
||||
# ATT: vfnmadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfnmadd132ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0x9c,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfnmadd132ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0x9c,0x31
|
||||
|
||||
# ATT: vfnmadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfnmadd132ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0x9c,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfnmadd132ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0x9c,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfnmadd132ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0x9c,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfnmadd132ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0x9c,0x31
|
||||
|
||||
# ATT: vfnmadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfnmadd132ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0x9c,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfnmadd132ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0x9c,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd213ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfnmadd213ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xac,0xf4
|
||||
|
||||
# ATT: vfnmadd213ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfnmadd213ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xac,0xf4
|
||||
|
||||
# ATT: vfnmadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfnmadd213ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xac,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfnmadd213ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xac,0x31
|
||||
|
||||
# ATT: vfnmadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfnmadd213ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xac,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfnmadd213ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xac,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfnmadd213ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xac,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfnmadd213ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xac,0x31
|
||||
|
||||
# ATT: vfnmadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfnmadd213ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xac,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfnmadd213ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xac,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd231ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfnmadd231ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xbc,0xf4
|
||||
|
||||
# ATT: vfnmadd231ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfnmadd231ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xbc,0xf4
|
||||
|
||||
# ATT: vfnmadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfnmadd231ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xbc,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfnmadd231ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xbc,0x31
|
||||
|
||||
# ATT: vfnmadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfnmadd231ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xbc,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfnmadd231ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xbc,0x72,0x80
|
||||
|
||||
# ATT: vfnmadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfnmadd231ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xbc,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfnmadd231ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xbc,0x31
|
||||
|
||||
# ATT: vfnmadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfnmadd231ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xbc,0x71,0x7f
|
||||
|
||||
# ATT: vfnmadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfnmadd231ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xbc,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub132ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfnmsub132ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0x9e,0xf4
|
||||
|
||||
# ATT: vfnmsub132ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfnmsub132ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0x9e,0xf4
|
||||
|
||||
# ATT: vfnmsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfnmsub132ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0x9e,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfnmsub132ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0x9e,0x31
|
||||
|
||||
# ATT: vfnmsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfnmsub132ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0x9e,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfnmsub132ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0x9e,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfnmsub132ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0x9e,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfnmsub132ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0x9e,0x31
|
||||
|
||||
# ATT: vfnmsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfnmsub132ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0x9e,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfnmsub132ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0x9e,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub213ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfnmsub213ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xae,0xf4
|
||||
|
||||
# ATT: vfnmsub213ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfnmsub213ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xae,0xf4
|
||||
|
||||
# ATT: vfnmsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfnmsub213ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xae,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfnmsub213ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xae,0x31
|
||||
|
||||
# ATT: vfnmsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfnmsub213ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xae,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfnmsub213ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xae,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfnmsub213ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xae,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfnmsub213ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xae,0x31
|
||||
|
||||
# ATT: vfnmsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfnmsub213ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xae,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfnmsub213ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xae,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub231ph %ymm4, %ymm5, %ymm6
|
||||
# INTEL: vfnmsub231ph ymm6, ymm5, ymm4
|
||||
0x62,0xf6,0x55,0x28,0xbe,0xf4
|
||||
|
||||
# ATT: vfnmsub231ph %xmm4, %xmm5, %xmm6
|
||||
# INTEL: vfnmsub231ph xmm6, xmm5, xmm4
|
||||
0x62,0xf6,0x55,0x08,0xbe,0xf4
|
||||
|
||||
# ATT: vfnmsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
# INTEL: vfnmsub231ph ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x2f,0xbe,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
# INTEL: vfnmsub231ph ymm6, ymm5, word ptr [ecx]{1to16}
|
||||
0x62,0xf6,0x55,0x38,0xbe,0x31
|
||||
|
||||
# ATT: vfnmsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
# INTEL: vfnmsub231ph ymm6, ymm5, ymmword ptr [ecx + 4064]
|
||||
0x62,0xf6,0x55,0x28,0xbe,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
# INTEL: vfnmsub231ph ymm6 {k7} {z}, ymm5, word ptr [edx - 256]{1to16}
|
||||
0x62,0xf6,0x55,0xbf,0xbe,0x72,0x80
|
||||
|
||||
# ATT: vfnmsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
# INTEL: vfnmsub231ph xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
|
||||
0x62,0xf6,0x55,0x0f,0xbe,0xb4,0xf4,0x00,0x00,0x00,0x10
|
||||
|
||||
# ATT: vfnmsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
# INTEL: vfnmsub231ph xmm6, xmm5, word ptr [ecx]{1to8}
|
||||
0x62,0xf6,0x55,0x18,0xbe,0x31
|
||||
|
||||
# ATT: vfnmsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
# INTEL: vfnmsub231ph xmm6, xmm5, xmmword ptr [ecx + 2032]
|
||||
0x62,0xf6,0x55,0x08,0xbe,0x71,0x7f
|
||||
|
||||
# ATT: vfnmsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
# INTEL: vfnmsub231ph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
|
||||
0x62,0xf6,0x55,0x9f,0xbe,0x72,0x80
|
||||
|
|
|
@ -1763,3 +1763,723 @@
|
|||
// CHECK: vsqrtsh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x65,0x16,0x87,0x51,0x72,0x80]
|
||||
vsqrtsh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd132ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0x98,0xf4]
|
||||
vfmadd132ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x98,0xf4]
|
||||
vfmadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0x98,0x31]
|
||||
vfmadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0x98,0x71,0x7f]
|
||||
vfmadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0x98,0x72,0x80]
|
||||
vfmadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd132sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x99,0xf4]
|
||||
vfmadd132sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x99,0xf4]
|
||||
vfmadd132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x99,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfmadd132sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0x99,0x31]
|
||||
vfmadd132sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd132sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x99,0x71,0x7f]
|
||||
vfmadd132sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0x99,0x72,0x80]
|
||||
vfmadd132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd213ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xa8,0xf4]
|
||||
vfmadd213ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xa8,0xf4]
|
||||
vfmadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xa8,0x31]
|
||||
vfmadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xa8,0x71,0x7f]
|
||||
vfmadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xa8,0x72,0x80]
|
||||
vfmadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd213sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xa9,0xf4]
|
||||
vfmadd213sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xa9,0xf4]
|
||||
vfmadd213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xa9,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfmadd213sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xa9,0x31]
|
||||
vfmadd213sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd213sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xa9,0x71,0x7f]
|
||||
vfmadd213sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xa9,0x72,0x80]
|
||||
vfmadd213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd231ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xb8,0xf4]
|
||||
vfmadd231ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xb8,0xf4]
|
||||
vfmadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xb8,0x31]
|
||||
vfmadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xb8,0x71,0x7f]
|
||||
vfmadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xb8,0x72,0x80]
|
||||
vfmadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd231sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xb9,0xf4]
|
||||
vfmadd231sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xb9,0xf4]
|
||||
vfmadd231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xb9,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfmadd231sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xb9,0x31]
|
||||
vfmadd231sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd231sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xb9,0x71,0x7f]
|
||||
vfmadd231sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmadd231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xb9,0x72,0x80]
|
||||
vfmadd231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub132ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0x96,0xf4]
|
||||
vfmaddsub132ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x96,0xf4]
|
||||
vfmaddsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0x96,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0x96,0x31]
|
||||
vfmaddsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0x96,0x71,0x7f]
|
||||
vfmaddsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0x96,0x72,0x80]
|
||||
vfmaddsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub213ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xa6,0xf4]
|
||||
vfmaddsub213ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xa6,0xf4]
|
||||
vfmaddsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xa6,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xa6,0x31]
|
||||
vfmaddsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xa6,0x71,0x7f]
|
||||
vfmaddsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xa6,0x72,0x80]
|
||||
vfmaddsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub231ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xb6,0xf4]
|
||||
vfmaddsub231ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xb6,0xf4]
|
||||
vfmaddsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xb6,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xb6,0x31]
|
||||
vfmaddsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xb6,0x71,0x7f]
|
||||
vfmaddsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmaddsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xb6,0x72,0x80]
|
||||
vfmaddsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub132ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0x9a,0xf4]
|
||||
vfmsub132ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x9a,0xf4]
|
||||
vfmsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0x9a,0x31]
|
||||
vfmsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0x9a,0x71,0x7f]
|
||||
vfmsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0x9a,0x72,0x80]
|
||||
vfmsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub132sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x9b,0xf4]
|
||||
vfmsub132sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x9b,0xf4]
|
||||
vfmsub132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x9b,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsub132sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0x9b,0x31]
|
||||
vfmsub132sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub132sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x9b,0x71,0x7f]
|
||||
vfmsub132sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0x9b,0x72,0x80]
|
||||
vfmsub132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub213ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xaa,0xf4]
|
||||
vfmsub213ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xaa,0xf4]
|
||||
vfmsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xaa,0x31]
|
||||
vfmsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xaa,0x71,0x7f]
|
||||
vfmsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xaa,0x72,0x80]
|
||||
vfmsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub213sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xab,0xf4]
|
||||
vfmsub213sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xab,0xf4]
|
||||
vfmsub213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xab,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsub213sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xab,0x31]
|
||||
vfmsub213sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub213sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xab,0x71,0x7f]
|
||||
vfmsub213sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xab,0x72,0x80]
|
||||
vfmsub213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub231ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xba,0xf4]
|
||||
vfmsub231ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xba,0xf4]
|
||||
vfmsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xba,0x31]
|
||||
vfmsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xba,0x71,0x7f]
|
||||
vfmsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xba,0x72,0x80]
|
||||
vfmsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub231sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xbb,0xf4]
|
||||
vfmsub231sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xbb,0xf4]
|
||||
vfmsub231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xbb,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsub231sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xbb,0x31]
|
||||
vfmsub231sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub231sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xbb,0x71,0x7f]
|
||||
vfmsub231sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfmsub231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xbb,0x72,0x80]
|
||||
vfmsub231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd132ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0x97,0xf4]
|
||||
vfmsubadd132ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x97,0xf4]
|
||||
vfmsubadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0x97,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0x97,0x31]
|
||||
vfmsubadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0x97,0x71,0x7f]
|
||||
vfmsubadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0x97,0x72,0x80]
|
||||
vfmsubadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd213ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xa7,0xf4]
|
||||
vfmsubadd213ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xa7,0xf4]
|
||||
vfmsubadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xa7,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xa7,0x31]
|
||||
vfmsubadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xa7,0x71,0x7f]
|
||||
vfmsubadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xa7,0x72,0x80]
|
||||
vfmsubadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd231ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xb7,0xf4]
|
||||
vfmsubadd231ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xb7,0xf4]
|
||||
vfmsubadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xb7,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xb7,0x31]
|
||||
vfmsubadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xb7,0x71,0x7f]
|
||||
vfmsubadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfmsubadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xb7,0x72,0x80]
|
||||
vfmsubadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd132ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0x9c,0xf4]
|
||||
vfnmadd132ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x9c,0xf4]
|
||||
vfnmadd132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0x9c,0x31]
|
||||
vfnmadd132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0x9c,0x71,0x7f]
|
||||
vfnmadd132ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0x9c,0x72,0x80]
|
||||
vfnmadd132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd132sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x9d,0xf4]
|
||||
vfnmadd132sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x9d,0xf4]
|
||||
vfnmadd132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x9d,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmadd132sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0x9d,0x31]
|
||||
vfnmadd132sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd132sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x9d,0x71,0x7f]
|
||||
vfnmadd132sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0x9d,0x72,0x80]
|
||||
vfnmadd132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd213ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xac,0xf4]
|
||||
vfnmadd213ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xac,0xf4]
|
||||
vfnmadd213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xac,0x31]
|
||||
vfnmadd213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xac,0x71,0x7f]
|
||||
vfnmadd213ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xac,0x72,0x80]
|
||||
vfnmadd213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd213sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xad,0xf4]
|
||||
vfnmadd213sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xad,0xf4]
|
||||
vfnmadd213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xad,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmadd213sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xad,0x31]
|
||||
vfnmadd213sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd213sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xad,0x71,0x7f]
|
||||
vfnmadd213sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xad,0x72,0x80]
|
||||
vfnmadd213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd231ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xbc,0xf4]
|
||||
vfnmadd231ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xbc,0xf4]
|
||||
vfnmadd231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xbc,0x31]
|
||||
vfnmadd231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xbc,0x71,0x7f]
|
||||
vfnmadd231ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xbc,0x72,0x80]
|
||||
vfnmadd231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd231sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xbd,0xf4]
|
||||
vfnmadd231sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xbd,0xf4]
|
||||
vfnmadd231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xbd,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmadd231sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xbd,0x31]
|
||||
vfnmadd231sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd231sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xbd,0x71,0x7f]
|
||||
vfnmadd231sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmadd231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xbd,0x72,0x80]
|
||||
vfnmadd231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub132ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0x9e,0xf4]
|
||||
vfnmsub132ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x9e,0xf4]
|
||||
vfnmsub132ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0x9e,0x31]
|
||||
vfnmsub132ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0x9e,0x71,0x7f]
|
||||
vfnmsub132ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0x9e,0x72,0x80]
|
||||
vfnmsub132ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub132sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x9f,0xf4]
|
||||
vfnmsub132sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0x9f,0xf4]
|
||||
vfnmsub132sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x9f,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmsub132sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0x9f,0x31]
|
||||
vfnmsub132sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub132sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x9f,0x71,0x7f]
|
||||
vfnmsub132sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0x9f,0x72,0x80]
|
||||
vfnmsub132sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub213ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xae,0xf4]
|
||||
vfnmsub213ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xae,0xf4]
|
||||
vfnmsub213ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xae,0x31]
|
||||
vfnmsub213ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xae,0x71,0x7f]
|
||||
vfnmsub213ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xae,0x72,0x80]
|
||||
vfnmsub213ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub213sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xaf,0xf4]
|
||||
vfnmsub213sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xaf,0xf4]
|
||||
vfnmsub213sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xaf,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmsub213sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xaf,0x31]
|
||||
vfnmsub213sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub213sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xaf,0x71,0x7f]
|
||||
vfnmsub213sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xaf,0x72,0x80]
|
||||
vfnmsub213sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub231ph %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x40,0xbe,0xf4]
|
||||
vfnmsub231ph %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xbe,0xf4]
|
||||
vfnmsub231ph {rn-sae}, %zmm28, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x47,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231ph 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x50,0xbe,0x31]
|
||||
vfnmsub231ph (%r9){1to32}, %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x40,0xbe,0x71,0x7f]
|
||||
vfnmsub231ph 8128(%rcx), %zmm29, %zmm30
|
||||
|
||||
// CHECK: vfnmsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xd7,0xbe,0x72,0x80]
|
||||
vfnmsub231ph -256(%rdx){1to32}, %zmm29, %zmm30 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub231sh %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xbf,0xf4]
|
||||
vfnmsub231sh %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x10,0xbf,0xf4]
|
||||
vfnmsub231sh {rn-sae}, %xmm28, %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xbf,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
|
||||
|
||||
// CHECK: vfnmsub231sh (%r9), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x00,0xbf,0x31]
|
||||
vfnmsub231sh (%r9), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub231sh 254(%rcx), %xmm29, %xmm30
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xbf,0x71,0x7f]
|
||||
vfnmsub231sh 254(%rcx), %xmm29, %xmm30
|
||||
|
||||
// CHECK: vfnmsub231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x87,0xbf,0x72,0x80]
|
||||
vfnmsub231sh -256(%rdx), %xmm29, %xmm30 {%k7} {z}
|
||||
|
|
|
@ -1491,3 +1491,723 @@
|
|||
// CHECK: vsqrtph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x51,0x72,0x80]
|
||||
vsqrtph -256(%edx){1to16}, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd132ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x98,0xf4]
|
||||
vfmadd132ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd132ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x98,0xf4]
|
||||
vfmadd132ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0x98,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0x98,0x31]
|
||||
vfmadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x98,0x71,0x7f]
|
||||
vfmadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0x98,0x72,0x80]
|
||||
vfmadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x98,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x98,0x31]
|
||||
vfmadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x98,0x71,0x7f]
|
||||
vfmadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0x98,0x72,0x80]
|
||||
vfmadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd213ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xa8,0xf4]
|
||||
vfmadd213ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd213ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa8,0xf4]
|
||||
vfmadd213ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xa8,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xa8,0x31]
|
||||
vfmadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xa8,0x71,0x7f]
|
||||
vfmadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xa8,0x72,0x80]
|
||||
vfmadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xa8,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa8,0x31]
|
||||
vfmadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa8,0x71,0x7f]
|
||||
vfmadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xa8,0x72,0x80]
|
||||
vfmadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd231ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xb8,0xf4]
|
||||
vfmadd231ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd231ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb8,0xf4]
|
||||
vfmadd231ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xb8,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xb8,0x31]
|
||||
vfmadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xb8,0x71,0x7f]
|
||||
vfmadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xb8,0x72,0x80]
|
||||
vfmadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xb8,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb8,0x31]
|
||||
vfmadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb8,0x71,0x7f]
|
||||
vfmadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xb8,0x72,0x80]
|
||||
vfmadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub132ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x96,0xf4]
|
||||
vfmaddsub132ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub132ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x96,0xf4]
|
||||
vfmaddsub132ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0x96,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0x96,0x31]
|
||||
vfmaddsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x96,0x71,0x7f]
|
||||
vfmaddsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0x96,0x72,0x80]
|
||||
vfmaddsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x96,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x96,0x31]
|
||||
vfmaddsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x96,0x71,0x7f]
|
||||
vfmaddsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0x96,0x72,0x80]
|
||||
vfmaddsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub213ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xa6,0xf4]
|
||||
vfmaddsub213ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub213ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa6,0xf4]
|
||||
vfmaddsub213ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xa6,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xa6,0x31]
|
||||
vfmaddsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xa6,0x71,0x7f]
|
||||
vfmaddsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xa6,0x72,0x80]
|
||||
vfmaddsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xa6,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa6,0x31]
|
||||
vfmaddsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa6,0x71,0x7f]
|
||||
vfmaddsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xa6,0x72,0x80]
|
||||
vfmaddsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub231ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xb6,0xf4]
|
||||
vfmaddsub231ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub231ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb6,0xf4]
|
||||
vfmaddsub231ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xb6,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xb6,0x31]
|
||||
vfmaddsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xb6,0x71,0x7f]
|
||||
vfmaddsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmaddsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xb6,0x72,0x80]
|
||||
vfmaddsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmaddsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xb6,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmaddsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb6,0x31]
|
||||
vfmaddsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb6,0x71,0x7f]
|
||||
vfmaddsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmaddsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xb6,0x72,0x80]
|
||||
vfmaddsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub132ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x9a,0xf4]
|
||||
vfmsub132ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub132ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9a,0xf4]
|
||||
vfmsub132ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0x9a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0x9a,0x31]
|
||||
vfmsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x9a,0x71,0x7f]
|
||||
vfmsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0x9a,0x72,0x80]
|
||||
vfmsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x9a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9a,0x31]
|
||||
vfmsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9a,0x71,0x7f]
|
||||
vfmsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0x9a,0x72,0x80]
|
||||
vfmsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub213ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xaa,0xf4]
|
||||
vfmsub213ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub213ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xaa,0xf4]
|
||||
vfmsub213ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xaa,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xaa,0x31]
|
||||
vfmsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xaa,0x71,0x7f]
|
||||
vfmsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xaa,0x72,0x80]
|
||||
vfmsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xaa,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xaa,0x31]
|
||||
vfmsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xaa,0x71,0x7f]
|
||||
vfmsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xaa,0x72,0x80]
|
||||
vfmsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub231ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xba,0xf4]
|
||||
vfmsub231ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub231ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xba,0xf4]
|
||||
vfmsub231ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xba,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xba,0x31]
|
||||
vfmsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xba,0x71,0x7f]
|
||||
vfmsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xba,0x72,0x80]
|
||||
vfmsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xba,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xba,0x31]
|
||||
vfmsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xba,0x71,0x7f]
|
||||
vfmsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xba,0x72,0x80]
|
||||
vfmsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd132ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x97,0xf4]
|
||||
vfmsubadd132ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd132ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x97,0xf4]
|
||||
vfmsubadd132ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0x97,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0x97,0x31]
|
||||
vfmsubadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x97,0x71,0x7f]
|
||||
vfmsubadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0x97,0x72,0x80]
|
||||
vfmsubadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x97,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x97,0x31]
|
||||
vfmsubadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x97,0x71,0x7f]
|
||||
vfmsubadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0x97,0x72,0x80]
|
||||
vfmsubadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd213ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xa7,0xf4]
|
||||
vfmsubadd213ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd213ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa7,0xf4]
|
||||
vfmsubadd213ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xa7,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xa7,0x31]
|
||||
vfmsubadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xa7,0x71,0x7f]
|
||||
vfmsubadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xa7,0x72,0x80]
|
||||
vfmsubadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xa7,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa7,0x31]
|
||||
vfmsubadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa7,0x71,0x7f]
|
||||
vfmsubadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xa7,0x72,0x80]
|
||||
vfmsubadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd231ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xb7,0xf4]
|
||||
vfmsubadd231ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd231ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb7,0xf4]
|
||||
vfmsubadd231ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xb7,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xb7,0x31]
|
||||
vfmsubadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xb7,0x71,0x7f]
|
||||
vfmsubadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfmsubadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xb7,0x72,0x80]
|
||||
vfmsubadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfmsubadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xb7,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfmsubadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb7,0x31]
|
||||
vfmsubadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb7,0x71,0x7f]
|
||||
vfmsubadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfmsubadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xb7,0x72,0x80]
|
||||
vfmsubadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd132ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x9c,0xf4]
|
||||
vfnmadd132ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd132ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9c,0xf4]
|
||||
vfnmadd132ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0x9c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfnmadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0x9c,0x31]
|
||||
vfnmadd132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x9c,0x71,0x7f]
|
||||
vfnmadd132ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0x9c,0x72,0x80]
|
||||
vfnmadd132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x9c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfnmadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9c,0x31]
|
||||
vfnmadd132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9c,0x71,0x7f]
|
||||
vfnmadd132ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0x9c,0x72,0x80]
|
||||
vfnmadd132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd213ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xac,0xf4]
|
||||
vfnmadd213ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd213ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xac,0xf4]
|
||||
vfnmadd213ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xac,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfnmadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xac,0x31]
|
||||
vfnmadd213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xac,0x71,0x7f]
|
||||
vfnmadd213ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xac,0x72,0x80]
|
||||
vfnmadd213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xac,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfnmadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xac,0x31]
|
||||
vfnmadd213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xac,0x71,0x7f]
|
||||
vfnmadd213ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xac,0x72,0x80]
|
||||
vfnmadd213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd231ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xbc,0xf4]
|
||||
vfnmadd231ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd231ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbc,0xf4]
|
||||
vfnmadd231ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xbc,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfnmadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xbc,0x31]
|
||||
vfnmadd231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xbc,0x71,0x7f]
|
||||
vfnmadd231ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xbc,0x72,0x80]
|
||||
vfnmadd231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xbc,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfnmadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbc,0x31]
|
||||
vfnmadd231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbc,0x71,0x7f]
|
||||
vfnmadd231ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xbc,0x72,0x80]
|
||||
vfnmadd231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub132ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x9e,0xf4]
|
||||
vfnmsub132ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub132ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9e,0xf4]
|
||||
vfnmsub132ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0x9e,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfnmsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0x9e,0x31]
|
||||
vfnmsub132ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0x9e,0x71,0x7f]
|
||||
vfnmsub132ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0x9e,0x72,0x80]
|
||||
vfnmsub132ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x9e,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfnmsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9e,0x31]
|
||||
vfnmsub132ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9e,0x71,0x7f]
|
||||
vfnmsub132ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0x9e,0x72,0x80]
|
||||
vfnmsub132ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub213ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xae,0xf4]
|
||||
vfnmsub213ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub213ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xae,0xf4]
|
||||
vfnmsub213ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xae,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfnmsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xae,0x31]
|
||||
vfnmsub213ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xae,0x71,0x7f]
|
||||
vfnmsub213ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xae,0x72,0x80]
|
||||
vfnmsub213ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xae,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfnmsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xae,0x31]
|
||||
vfnmsub213ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xae,0x71,0x7f]
|
||||
vfnmsub213ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xae,0x72,0x80]
|
||||
vfnmsub213ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub231ph %ymm4, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xbe,0xf4]
|
||||
vfnmsub231ph %ymm4, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub231ph %xmm4, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbe,0xf4]
|
||||
vfnmsub231ph %xmm4, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x2f,0xbe,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231ph 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
|
||||
|
||||
// CHECK: vfnmsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x38,0xbe,0x31]
|
||||
vfnmsub231ph (%ecx){1to16}, %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x28,0xbe,0x71,0x7f]
|
||||
vfnmsub231ph 4064(%ecx), %ymm5, %ymm6
|
||||
|
||||
// CHECK: vfnmsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xbf,0xbe,0x72,0x80]
|
||||
vfnmsub231ph -256(%edx){1to16}, %ymm5, %ymm6 {%k7} {z}
|
||||
|
||||
// CHECK: vfnmsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xbe,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231ph 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
|
||||
|
||||
// CHECK: vfnmsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbe,0x31]
|
||||
vfnmsub231ph (%ecx){1to8}, %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbe,0x71,0x7f]
|
||||
vfnmsub231ph 2032(%ecx), %xmm5, %xmm6
|
||||
|
||||
// CHECK: vfnmsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x9f,0xbe,0x72,0x80]
|
||||
vfnmsub231ph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
|
||||
|
|
|
@ -1635,3 +1635,723 @@
|
|||
// CHECK: vsqrtsh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf5,0x56,0x8f,0x51,0x72,0x80]
|
||||
vsqrtsh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmadd132ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x98,0xf4]
|
||||
vfmadd132ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmadd132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x98,0xf4]
|
||||
vfmadd132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmadd132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0x98,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmadd132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0x98,0x31]
|
||||
vfmadd132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmadd132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x98,0x71,0x7f]
|
||||
vfmadd132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmadd132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0x98,0x72,0x80]
|
||||
vfmadd132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmadd132sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x99,0xf4]
|
||||
vfmadd132sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfmadd132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x99,0xf4]
|
||||
vfmadd132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmadd132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x99,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmadd132sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x99,0x31]
|
||||
vfmadd132sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfmadd132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x99,0x71,0x7f]
|
||||
vfmadd132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfmadd132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0x99,0x72,0x80]
|
||||
vfmadd132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmadd213ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xa8,0xf4]
|
||||
vfmadd213ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmadd213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa8,0xf4]
|
||||
vfmadd213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmadd213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xa8,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmadd213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xa8,0x31]
|
||||
vfmadd213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmadd213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xa8,0x71,0x7f]
|
||||
vfmadd213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmadd213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xa8,0x72,0x80]
|
||||
vfmadd213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmadd213sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa9,0xf4]
|
||||
vfmadd213sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfmadd213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa9,0xf4]
|
||||
vfmadd213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmadd213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xa9,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmadd213sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa9,0x31]
|
||||
vfmadd213sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfmadd213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xa9,0x71,0x7f]
|
||||
vfmadd213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfmadd213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xa9,0x72,0x80]
|
||||
vfmadd213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmadd231ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xb8,0xf4]
|
||||
vfmadd231ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmadd231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb8,0xf4]
|
||||
vfmadd231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmadd231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xb8,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmadd231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xb8,0x31]
|
||||
vfmadd231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmadd231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xb8,0x71,0x7f]
|
||||
vfmadd231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmadd231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xb8,0x72,0x80]
|
||||
vfmadd231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmadd231sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb9,0xf4]
|
||||
vfmadd231sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfmadd231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb9,0xf4]
|
||||
vfmadd231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmadd231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xb9,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmadd231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmadd231sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb9,0x31]
|
||||
vfmadd231sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfmadd231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xb9,0x71,0x7f]
|
||||
vfmadd231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfmadd231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xb9,0x72,0x80]
|
||||
vfmadd231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmaddsub132ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x96,0xf4]
|
||||
vfmaddsub132ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmaddsub132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x96,0xf4]
|
||||
vfmaddsub132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmaddsub132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0x96,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0x96,0x31]
|
||||
vfmaddsub132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmaddsub132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x96,0x71,0x7f]
|
||||
vfmaddsub132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmaddsub132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0x96,0x72,0x80]
|
||||
vfmaddsub132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmaddsub213ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xa6,0xf4]
|
||||
vfmaddsub213ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmaddsub213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa6,0xf4]
|
||||
vfmaddsub213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmaddsub213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xa6,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xa6,0x31]
|
||||
vfmaddsub213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmaddsub213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xa6,0x71,0x7f]
|
||||
vfmaddsub213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmaddsub213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xa6,0x72,0x80]
|
||||
vfmaddsub213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmaddsub231ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xb6,0xf4]
|
||||
vfmaddsub231ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmaddsub231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb6,0xf4]
|
||||
vfmaddsub231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmaddsub231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xb6,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xb6,0x31]
|
||||
vfmaddsub231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmaddsub231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xb6,0x71,0x7f]
|
||||
vfmaddsub231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmaddsub231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xb6,0x72,0x80]
|
||||
vfmaddsub231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmsub132ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x9a,0xf4]
|
||||
vfmsub132ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmsub132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9a,0xf4]
|
||||
vfmsub132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsub132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0x9a,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsub132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0x9a,0x31]
|
||||
vfmsub132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmsub132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x9a,0x71,0x7f]
|
||||
vfmsub132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmsub132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0x9a,0x72,0x80]
|
||||
vfmsub132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmsub132sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9b,0xf4]
|
||||
vfmsub132sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfmsub132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9b,0xf4]
|
||||
vfmsub132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsub132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x9b,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsub132sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9b,0x31]
|
||||
vfmsub132sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfmsub132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9b,0x71,0x7f]
|
||||
vfmsub132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfmsub132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0x9b,0x72,0x80]
|
||||
vfmsub132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmsub213ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xaa,0xf4]
|
||||
vfmsub213ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmsub213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xaa,0xf4]
|
||||
vfmsub213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsub213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xaa,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsub213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xaa,0x31]
|
||||
vfmsub213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmsub213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xaa,0x71,0x7f]
|
||||
vfmsub213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmsub213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xaa,0x72,0x80]
|
||||
vfmsub213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmsub213sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xab,0xf4]
|
||||
vfmsub213sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfmsub213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xab,0xf4]
|
||||
vfmsub213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsub213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xab,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsub213sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xab,0x31]
|
||||
vfmsub213sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfmsub213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xab,0x71,0x7f]
|
||||
vfmsub213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfmsub213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xab,0x72,0x80]
|
||||
vfmsub213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmsub231ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xba,0xf4]
|
||||
vfmsub231ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmsub231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xba,0xf4]
|
||||
vfmsub231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsub231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xba,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsub231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xba,0x31]
|
||||
vfmsub231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmsub231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xba,0x71,0x7f]
|
||||
vfmsub231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmsub231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xba,0x72,0x80]
|
||||
vfmsub231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmsub231sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbb,0xf4]
|
||||
vfmsub231sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfmsub231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbb,0xf4]
|
||||
vfmsub231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsub231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xbb,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsub231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsub231sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbb,0x31]
|
||||
vfmsub231sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfmsub231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbb,0x71,0x7f]
|
||||
vfmsub231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfmsub231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xbb,0x72,0x80]
|
||||
vfmsub231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfmsubadd132ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x97,0xf4]
|
||||
vfmsubadd132ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmsubadd132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x97,0xf4]
|
||||
vfmsubadd132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsubadd132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0x97,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0x97,0x31]
|
||||
vfmsubadd132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmsubadd132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x97,0x71,0x7f]
|
||||
vfmsubadd132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmsubadd132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0x97,0x72,0x80]
|
||||
vfmsubadd132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmsubadd213ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xa7,0xf4]
|
||||
vfmsubadd213ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmsubadd213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xa7,0xf4]
|
||||
vfmsubadd213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsubadd213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xa7,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xa7,0x31]
|
||||
vfmsubadd213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmsubadd213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xa7,0x71,0x7f]
|
||||
vfmsubadd213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmsubadd213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xa7,0x72,0x80]
|
||||
vfmsubadd213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfmsubadd231ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xb7,0xf4]
|
||||
vfmsubadd231ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfmsubadd231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xb7,0xf4]
|
||||
vfmsubadd231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfmsubadd231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xb7,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xb7,0x31]
|
||||
vfmsubadd231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfmsubadd231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xb7,0x71,0x7f]
|
||||
vfmsubadd231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfmsubadd231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xb7,0x72,0x80]
|
||||
vfmsubadd231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmadd132ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x9c,0xf4]
|
||||
vfnmadd132ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfnmadd132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9c,0xf4]
|
||||
vfnmadd132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmadd132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0x9c,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmadd132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0x9c,0x31]
|
||||
vfnmadd132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfnmadd132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x9c,0x71,0x7f]
|
||||
vfnmadd132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfnmadd132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0x9c,0x72,0x80]
|
||||
vfnmadd132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmadd132sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9d,0xf4]
|
||||
vfnmadd132sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfnmadd132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9d,0xf4]
|
||||
vfnmadd132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmadd132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x9d,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmadd132sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9d,0x31]
|
||||
vfnmadd132sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfnmadd132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9d,0x71,0x7f]
|
||||
vfnmadd132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfnmadd132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0x9d,0x72,0x80]
|
||||
vfnmadd132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfnmadd213ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xac,0xf4]
|
||||
vfnmadd213ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfnmadd213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xac,0xf4]
|
||||
vfnmadd213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmadd213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xac,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmadd213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xac,0x31]
|
||||
vfnmadd213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfnmadd213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xac,0x71,0x7f]
|
||||
vfnmadd213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfnmadd213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xac,0x72,0x80]
|
||||
vfnmadd213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmadd213sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xad,0xf4]
|
||||
vfnmadd213sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfnmadd213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xad,0xf4]
|
||||
vfnmadd213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmadd213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xad,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmadd213sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xad,0x31]
|
||||
vfnmadd213sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfnmadd213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xad,0x71,0x7f]
|
||||
vfnmadd213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfnmadd213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xad,0x72,0x80]
|
||||
vfnmadd213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfnmadd231ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xbc,0xf4]
|
||||
vfnmadd231ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfnmadd231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbc,0xf4]
|
||||
vfnmadd231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmadd231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xbc,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmadd231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xbc,0x31]
|
||||
vfnmadd231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfnmadd231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xbc,0x71,0x7f]
|
||||
vfnmadd231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfnmadd231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xbc,0x72,0x80]
|
||||
vfnmadd231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmadd231sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbd,0xf4]
|
||||
vfnmadd231sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfnmadd231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbd,0xf4]
|
||||
vfnmadd231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmadd231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xbd,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmadd231sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbd,0x31]
|
||||
vfnmadd231sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfnmadd231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbd,0x71,0x7f]
|
||||
vfnmadd231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfnmadd231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xbd,0x72,0x80]
|
||||
vfnmadd231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfnmsub132ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x9e,0xf4]
|
||||
vfnmsub132ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfnmsub132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9e,0xf4]
|
||||
vfnmsub132ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmsub132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0x9e,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmsub132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0x9e,0x31]
|
||||
vfnmsub132ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfnmsub132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0x9e,0x71,0x7f]
|
||||
vfnmsub132ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfnmsub132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0x9e,0x72,0x80]
|
||||
vfnmsub132ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmsub132sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9f,0xf4]
|
||||
vfnmsub132sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfnmsub132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0x9f,0xf4]
|
||||
vfnmsub132sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmsub132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0x9f,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmsub132sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9f,0x31]
|
||||
vfnmsub132sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfnmsub132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0x9f,0x71,0x7f]
|
||||
vfnmsub132sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfnmsub132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0x9f,0x72,0x80]
|
||||
vfnmsub132sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfnmsub213ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xae,0xf4]
|
||||
vfnmsub213ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfnmsub213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xae,0xf4]
|
||||
vfnmsub213ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmsub213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xae,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmsub213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xae,0x31]
|
||||
vfnmsub213ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfnmsub213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xae,0x71,0x7f]
|
||||
vfnmsub213ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfnmsub213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xae,0x72,0x80]
|
||||
vfnmsub213ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmsub213sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xaf,0xf4]
|
||||
vfnmsub213sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfnmsub213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xaf,0xf4]
|
||||
vfnmsub213sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmsub213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xaf,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmsub213sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xaf,0x31]
|
||||
vfnmsub213sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfnmsub213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xaf,0x71,0x7f]
|
||||
vfnmsub213sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfnmsub213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xaf,0x72,0x80]
|
||||
vfnmsub213sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
||||
// CHECK: vfnmsub231ph zmm6, zmm5, zmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xbe,0xf4]
|
||||
vfnmsub231ph zmm6, zmm5, zmm4
|
||||
|
||||
// CHECK: vfnmsub231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbe,0xf4]
|
||||
vfnmsub231ph zmm6, zmm5, zmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmsub231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x4f,0xbe,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231ph zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmsub231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x58,0xbe,0x31]
|
||||
vfnmsub231ph zmm6, zmm5, word ptr [ecx]{1to32}
|
||||
|
||||
// CHECK: vfnmsub231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x48,0xbe,0x71,0x7f]
|
||||
vfnmsub231ph zmm6, zmm5, zmmword ptr [ecx + 8128]
|
||||
|
||||
// CHECK: vfnmsub231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0xdf,0xbe,0x72,0x80]
|
||||
vfnmsub231ph zmm6 {k7} {z}, zmm5, word ptr [edx - 256]{1to32}
|
||||
|
||||
// CHECK: vfnmsub231sh xmm6, xmm5, xmm4
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbf,0xf4]
|
||||
vfnmsub231sh xmm6, xmm5, xmm4
|
||||
|
||||
// CHECK: vfnmsub231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x18,0xbf,0xf4]
|
||||
vfnmsub231sh xmm6, xmm5, xmm4, {rn-sae}
|
||||
|
||||
// CHECK: vfnmsub231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x0f,0xbf,0xb4,0xf4,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231sh xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
|
||||
|
||||
// CHECK: vfnmsub231sh xmm6, xmm5, word ptr [ecx]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbf,0x31]
|
||||
vfnmsub231sh xmm6, xmm5, word ptr [ecx]
|
||||
|
||||
// CHECK: vfnmsub231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x08,0xbf,0x71,0x7f]
|
||||
vfnmsub231sh xmm6, xmm5, word ptr [ecx + 254]
|
||||
|
||||
// CHECK: vfnmsub231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
// CHECK: encoding: [0x62,0xf6,0x55,0x8f,0xbf,0x72,0x80]
|
||||
vfnmsub231sh xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
|
||||
|
|
|
@ -1491,3 +1491,723 @@
|
|||
// CHECK: vsqrtph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x51,0x72,0x80]
|
||||
vsqrtph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmadd132ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0x98,0xf4]
|
||||
vfmadd132ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmadd132ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x98,0xf4]
|
||||
vfmadd132ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmadd132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmadd132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0x98,0x31]
|
||||
vfmadd132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmadd132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0x98,0x71,0x7f]
|
||||
vfmadd132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmadd132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0x98,0x72,0x80]
|
||||
vfmadd132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmadd132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmadd132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0x98,0x31]
|
||||
vfmadd132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmadd132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x98,0x71,0x7f]
|
||||
vfmadd132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmadd132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0x98,0x72,0x80]
|
||||
vfmadd132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmadd213ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xa8,0xf4]
|
||||
vfmadd213ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmadd213ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xa8,0xf4]
|
||||
vfmadd213ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmadd213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmadd213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xa8,0x31]
|
||||
vfmadd213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmadd213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xa8,0x71,0x7f]
|
||||
vfmadd213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmadd213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xa8,0x72,0x80]
|
||||
vfmadd213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmadd213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmadd213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xa8,0x31]
|
||||
vfmadd213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmadd213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xa8,0x71,0x7f]
|
||||
vfmadd213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmadd213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xa8,0x72,0x80]
|
||||
vfmadd213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmadd231ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xb8,0xf4]
|
||||
vfmadd231ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmadd231ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xb8,0xf4]
|
||||
vfmadd231ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmadd231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmadd231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xb8,0x31]
|
||||
vfmadd231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmadd231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xb8,0x71,0x7f]
|
||||
vfmadd231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmadd231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xb8,0x72,0x80]
|
||||
vfmadd231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmadd231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmadd231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmadd231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xb8,0x31]
|
||||
vfmadd231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmadd231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xb8,0x71,0x7f]
|
||||
vfmadd231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmadd231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xb8,0x72,0x80]
|
||||
vfmadd231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmaddsub132ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0x96,0xf4]
|
||||
vfmaddsub132ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmaddsub132ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x96,0xf4]
|
||||
vfmaddsub132ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmaddsub132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0x96,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0x96,0x31]
|
||||
vfmaddsub132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmaddsub132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0x96,0x71,0x7f]
|
||||
vfmaddsub132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmaddsub132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0x96,0x72,0x80]
|
||||
vfmaddsub132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmaddsub132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x96,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0x96,0x31]
|
||||
vfmaddsub132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmaddsub132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x96,0x71,0x7f]
|
||||
vfmaddsub132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmaddsub132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0x96,0x72,0x80]
|
||||
vfmaddsub132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmaddsub213ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xa6,0xf4]
|
||||
vfmaddsub213ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmaddsub213ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xa6,0xf4]
|
||||
vfmaddsub213ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmaddsub213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xa6,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xa6,0x31]
|
||||
vfmaddsub213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmaddsub213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xa6,0x71,0x7f]
|
||||
vfmaddsub213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmaddsub213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xa6,0x72,0x80]
|
||||
vfmaddsub213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmaddsub213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xa6,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xa6,0x31]
|
||||
vfmaddsub213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmaddsub213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xa6,0x71,0x7f]
|
||||
vfmaddsub213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmaddsub213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xa6,0x72,0x80]
|
||||
vfmaddsub213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmaddsub231ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xb6,0xf4]
|
||||
vfmaddsub231ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmaddsub231ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xb6,0xf4]
|
||||
vfmaddsub231ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmaddsub231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xb6,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xb6,0x31]
|
||||
vfmaddsub231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmaddsub231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xb6,0x71,0x7f]
|
||||
vfmaddsub231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmaddsub231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xb6,0x72,0x80]
|
||||
vfmaddsub231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmaddsub231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xb6,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmaddsub231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmaddsub231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xb6,0x31]
|
||||
vfmaddsub231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmaddsub231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xb6,0x71,0x7f]
|
||||
vfmaddsub231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmaddsub231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xb6,0x72,0x80]
|
||||
vfmaddsub231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmsub132ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0x9a,0xf4]
|
||||
vfmsub132ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmsub132ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x9a,0xf4]
|
||||
vfmsub132ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmsub132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsub132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0x9a,0x31]
|
||||
vfmsub132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmsub132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0x9a,0x71,0x7f]
|
||||
vfmsub132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmsub132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0x9a,0x72,0x80]
|
||||
vfmsub132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmsub132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsub132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0x9a,0x31]
|
||||
vfmsub132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmsub132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x9a,0x71,0x7f]
|
||||
vfmsub132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmsub132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0x9a,0x72,0x80]
|
||||
vfmsub132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmsub213ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xaa,0xf4]
|
||||
vfmsub213ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmsub213ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xaa,0xf4]
|
||||
vfmsub213ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmsub213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsub213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xaa,0x31]
|
||||
vfmsub213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmsub213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xaa,0x71,0x7f]
|
||||
vfmsub213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmsub213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xaa,0x72,0x80]
|
||||
vfmsub213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmsub213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsub213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xaa,0x31]
|
||||
vfmsub213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmsub213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xaa,0x71,0x7f]
|
||||
vfmsub213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmsub213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xaa,0x72,0x80]
|
||||
vfmsub213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmsub231ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xba,0xf4]
|
||||
vfmsub231ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmsub231ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xba,0xf4]
|
||||
vfmsub231ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmsub231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsub231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xba,0x31]
|
||||
vfmsub231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmsub231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xba,0x71,0x7f]
|
||||
vfmsub231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmsub231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xba,0x72,0x80]
|
||||
vfmsub231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmsub231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsub231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsub231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xba,0x31]
|
||||
vfmsub231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmsub231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xba,0x71,0x7f]
|
||||
vfmsub231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmsub231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xba,0x72,0x80]
|
||||
vfmsub231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmsubadd132ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0x97,0xf4]
|
||||
vfmsubadd132ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmsubadd132ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x97,0xf4]
|
||||
vfmsubadd132ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmsubadd132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0x97,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0x97,0x31]
|
||||
vfmsubadd132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmsubadd132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0x97,0x71,0x7f]
|
||||
vfmsubadd132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmsubadd132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0x97,0x72,0x80]
|
||||
vfmsubadd132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmsubadd132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x97,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0x97,0x31]
|
||||
vfmsubadd132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmsubadd132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x97,0x71,0x7f]
|
||||
vfmsubadd132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmsubadd132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0x97,0x72,0x80]
|
||||
vfmsubadd132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmsubadd213ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xa7,0xf4]
|
||||
vfmsubadd213ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmsubadd213ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xa7,0xf4]
|
||||
vfmsubadd213ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmsubadd213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xa7,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xa7,0x31]
|
||||
vfmsubadd213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmsubadd213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xa7,0x71,0x7f]
|
||||
vfmsubadd213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmsubadd213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xa7,0x72,0x80]
|
||||
vfmsubadd213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmsubadd213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xa7,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xa7,0x31]
|
||||
vfmsubadd213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmsubadd213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xa7,0x71,0x7f]
|
||||
vfmsubadd213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmsubadd213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xa7,0x72,0x80]
|
||||
vfmsubadd213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfmsubadd231ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xb7,0xf4]
|
||||
vfmsubadd231ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfmsubadd231ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xb7,0xf4]
|
||||
vfmsubadd231ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfmsubadd231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xb7,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xb7,0x31]
|
||||
vfmsubadd231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfmsubadd231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xb7,0x71,0x7f]
|
||||
vfmsubadd231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfmsubadd231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xb7,0x72,0x80]
|
||||
vfmsubadd231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfmsubadd231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xb7,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfmsubadd231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfmsubadd231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xb7,0x31]
|
||||
vfmsubadd231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfmsubadd231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xb7,0x71,0x7f]
|
||||
vfmsubadd231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfmsubadd231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xb7,0x72,0x80]
|
||||
vfmsubadd231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfnmadd132ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0x9c,0xf4]
|
||||
vfnmadd132ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfnmadd132ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x9c,0xf4]
|
||||
vfnmadd132ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfnmadd132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmadd132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0x9c,0x31]
|
||||
vfnmadd132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfnmadd132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0x9c,0x71,0x7f]
|
||||
vfnmadd132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfnmadd132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0x9c,0x72,0x80]
|
||||
vfnmadd132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfnmadd132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmadd132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0x9c,0x31]
|
||||
vfnmadd132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfnmadd132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x9c,0x71,0x7f]
|
||||
vfnmadd132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfnmadd132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0x9c,0x72,0x80]
|
||||
vfnmadd132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfnmadd213ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xac,0xf4]
|
||||
vfnmadd213ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfnmadd213ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xac,0xf4]
|
||||
vfnmadd213ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfnmadd213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmadd213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xac,0x31]
|
||||
vfnmadd213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfnmadd213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xac,0x71,0x7f]
|
||||
vfnmadd213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfnmadd213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xac,0x72,0x80]
|
||||
vfnmadd213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfnmadd213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmadd213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xac,0x31]
|
||||
vfnmadd213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfnmadd213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xac,0x71,0x7f]
|
||||
vfnmadd213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfnmadd213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xac,0x72,0x80]
|
||||
vfnmadd213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfnmadd231ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xbc,0xf4]
|
||||
vfnmadd231ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfnmadd231ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xbc,0xf4]
|
||||
vfnmadd231ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfnmadd231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmadd231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xbc,0x31]
|
||||
vfnmadd231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfnmadd231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xbc,0x71,0x7f]
|
||||
vfnmadd231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfnmadd231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xbc,0x72,0x80]
|
||||
vfnmadd231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfnmadd231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmadd231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmadd231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xbc,0x31]
|
||||
vfnmadd231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfnmadd231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xbc,0x71,0x7f]
|
||||
vfnmadd231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfnmadd231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xbc,0x72,0x80]
|
||||
vfnmadd231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfnmsub132ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0x9e,0xf4]
|
||||
vfnmsub132ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfnmsub132ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0x9e,0xf4]
|
||||
vfnmsub132ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfnmsub132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmsub132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0x9e,0x31]
|
||||
vfnmsub132ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfnmsub132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0x9e,0x71,0x7f]
|
||||
vfnmsub132ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfnmsub132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0x9e,0x72,0x80]
|
||||
vfnmsub132ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfnmsub132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub132ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmsub132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0x9e,0x31]
|
||||
vfnmsub132ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfnmsub132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0x9e,0x71,0x7f]
|
||||
vfnmsub132ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfnmsub132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0x9e,0x72,0x80]
|
||||
vfnmsub132ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfnmsub213ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xae,0xf4]
|
||||
vfnmsub213ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfnmsub213ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xae,0xf4]
|
||||
vfnmsub213ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfnmsub213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmsub213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xae,0x31]
|
||||
vfnmsub213ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfnmsub213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xae,0x71,0x7f]
|
||||
vfnmsub213ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfnmsub213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xae,0x72,0x80]
|
||||
vfnmsub213ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfnmsub213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub213ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmsub213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xae,0x31]
|
||||
vfnmsub213ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfnmsub213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xae,0x71,0x7f]
|
||||
vfnmsub213ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfnmsub213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xae,0x72,0x80]
|
||||
vfnmsub213ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
||||
// CHECK: vfnmsub231ph ymm30, ymm29, ymm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x20,0xbe,0xf4]
|
||||
vfnmsub231ph ymm30, ymm29, ymm28
|
||||
|
||||
// CHECK: vfnmsub231ph xmm30, xmm29, xmm28
|
||||
// CHECK: encoding: [0x62,0x06,0x15,0x00,0xbe,0xf4]
|
||||
vfnmsub231ph xmm30, xmm29, xmm28
|
||||
|
||||
// CHECK: vfnmsub231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x27,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231ph ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmsub231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x30,0xbe,0x31]
|
||||
vfnmsub231ph ymm30, ymm29, word ptr [r9]{1to16}
|
||||
|
||||
// CHECK: vfnmsub231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x20,0xbe,0x71,0x7f]
|
||||
vfnmsub231ph ymm30, ymm29, ymmword ptr [rcx + 4064]
|
||||
|
||||
// CHECK: vfnmsub231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0xb7,0xbe,0x72,0x80]
|
||||
vfnmsub231ph ymm30 {k7} {z}, ymm29, word ptr [rdx - 256]{1to16}
|
||||
|
||||
// CHECK: vfnmsub231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
// CHECK: encoding: [0x62,0x26,0x15,0x07,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
|
||||
vfnmsub231ph xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
|
||||
|
||||
// CHECK: vfnmsub231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
// CHECK: encoding: [0x62,0x46,0x15,0x10,0xbe,0x31]
|
||||
vfnmsub231ph xmm30, xmm29, word ptr [r9]{1to8}
|
||||
|
||||
// CHECK: vfnmsub231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x00,0xbe,0x71,0x7f]
|
||||
vfnmsub231ph xmm30, xmm29, xmmword ptr [rcx + 2032]
|
||||
|
||||
// CHECK: vfnmsub231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
// CHECK: encoding: [0x62,0x66,0x15,0x97,0xbe,0x72,0x80]
|
||||
vfnmsub231ph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
|
||||
|
|
Loading…
Reference in New Issue