forked from OSchip/llvm-project
AVX-512: Added SKX instructions and intrinsics:
{add/sub/mul/div/} x {ps/pd} x {128/256} 2. max/min with sae By Asaf Badouh (asaf.badouh@intel.com) llvm-svn: 236971
This commit is contained in:
parent
176fd7c4af
commit
0d7e9364d1
|
@ -3205,39 +3205,111 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
// Arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
def int_x86_avx512_mask_add_ps_128 : GCCBuiltin<"__builtin_ia32_addps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_add_ps_256 : GCCBuiltin<"__builtin_ia32_addps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_add_pd_128 : GCCBuiltin<"__builtin_ia32_addpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_add_pd_256 : GCCBuiltin<"__builtin_ia32_addpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_ps_128 : GCCBuiltin<"__builtin_ia32_subps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_ps_256 : GCCBuiltin<"__builtin_ia32_subps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_pd_128 : GCCBuiltin<"__builtin_ia32_subpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_pd_256 : GCCBuiltin<"__builtin_ia32_subpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_ps_128 : GCCBuiltin<"__builtin_ia32_mulps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_ps_256 : GCCBuiltin<"__builtin_ia32_mulps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_pd_128 : GCCBuiltin<"__builtin_ia32_mulpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_pd_256 : GCCBuiltin<"__builtin_ia32_mulpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_ps_128 : GCCBuiltin<"__builtin_ia32_divps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_ps_256 : GCCBuiltin<"__builtin_ia32_divps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_pd_128 : GCCBuiltin<"__builtin_ia32_divpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_pd_256 : GCCBuiltin<"__builtin_ia32_divpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_128 : GCCBuiltin<"__builtin_ia32_maxps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_128 : GCCBuiltin<"__builtin_ia32_maxpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_128 : GCCBuiltin<"__builtin_ia32_minps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_128 : GCCBuiltin<"__builtin_ia32_minpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
|
|
@ -205,6 +205,8 @@ namespace llvm {
|
|||
FSUB_RND,
|
||||
FMUL_RND,
|
||||
FDIV_RND,
|
||||
FMAX_RND,
|
||||
FMIN_RND,
|
||||
|
||||
// Integer add/sub with unsigned saturation.
|
||||
ADDUS,
|
||||
|
|
|
@ -180,21 +180,20 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
|
|||
list<dag> Pattern,
|
||||
list<dag> MaskingPattern,
|
||||
list<dag> ZeroMaskingPattern,
|
||||
string Round = "",
|
||||
string MaskingConstraint = "",
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> {
|
||||
let isCommutable = IsCommutable in
|
||||
def NAME: AVX512<O, F, Outs, Ins,
|
||||
OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
|
||||
"$dst "#Round#", "#IntelSrcAsm#"}",
|
||||
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
|
||||
"$dst , "#IntelSrcAsm#"}",
|
||||
Pattern, itin>;
|
||||
|
||||
// Prefer over VMOV*rrk Pat<>
|
||||
let AddedComplexity = 20 in
|
||||
def NAME#k: AVX512<O, F, Outs, MaskingIns,
|
||||
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
|
||||
"$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
|
||||
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
|
||||
"$dst {${mask}}, "#IntelSrcAsm#"}",
|
||||
MaskingPattern, itin>,
|
||||
EVEX_K {
|
||||
// In case of the 3src subclass this is overridden with a let.
|
||||
|
@ -202,8 +201,8 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
|
|||
}
|
||||
let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
|
||||
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
|
||||
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}"#Round#"|"#
|
||||
"$dst {${mask}} {z}"#Round#", "#IntelSrcAsm#"}",
|
||||
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
|
||||
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
|
||||
ZeroMaskingPattern,
|
||||
itin>,
|
||||
EVEX_KZ;
|
||||
|
@ -217,7 +216,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, dag MaskingRHS,
|
||||
SDNode Select = vselect, string Round = "",
|
||||
SDNode Select = vselect,
|
||||
string MaskingConstraint = "",
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> :
|
||||
|
@ -227,7 +226,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
[(set _.RC:$dst, MaskingRHS)],
|
||||
[(set _.RC:$dst,
|
||||
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
|
||||
Round, MaskingConstraint, NoItinerary, IsCommutable>;
|
||||
MaskingConstraint, NoItinerary, IsCommutable>;
|
||||
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the vector instruction. In the masking case, the
|
||||
|
@ -235,7 +234,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, string Round = "",
|
||||
dag RHS,
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> :
|
||||
AVX512_maskable_common<O, F, _, Outs, Ins,
|
||||
|
@ -243,14 +242,14 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect,
|
||||
Round, "$src0 = $dst", itin, IsCommutable>;
|
||||
"$src0 = $dst", itin, IsCommutable>;
|
||||
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the scalar instruction.
|
||||
multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, string Round = "",
|
||||
dag RHS,
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> :
|
||||
AVX512_maskable_common<O, F, _, Outs, Ins,
|
||||
|
@ -258,7 +257,7 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select,
|
||||
Round, "$src0 = $dst", itin, IsCommutable>;
|
||||
"$src0 = $dst", itin, IsCommutable>;
|
||||
|
||||
// Similar to AVX512_maskable but in this case one of the source operands
|
||||
// ($src1) is already tied to $dst so we just use that for the preserved
|
||||
|
@ -284,7 +283,7 @@ multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
AVX512_maskable_custom<O, F, Outs, Ins,
|
||||
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
|
||||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], "",
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
|
||||
"$src0 = $dst">;
|
||||
|
||||
|
||||
|
@ -2963,7 +2962,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
|
||||
"", itins.rr, IsCommutable>,
|
||||
itins.rr, IsCommutable>,
|
||||
AVX512BIBase, EVEX_4V;
|
||||
|
||||
let mayLoad = 1 in
|
||||
|
@ -2972,7 +2971,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1,
|
||||
(bitconvert (_.LdFrag addr:$src2)))),
|
||||
"", itins.rm>,
|
||||
itins.rm>,
|
||||
AVX512BIBase, EVEX_4V;
|
||||
}
|
||||
|
||||
|
@ -2988,7 +2987,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(_.VT (OpNode _.RC:$src1,
|
||||
(X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2)))),
|
||||
"", itins.rm>,
|
||||
itins.rm>,
|
||||
AVX512BIBase, EVEX_4V, EVEX_B;
|
||||
}
|
||||
|
||||
|
@ -3090,7 +3089,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
|
|||
(_Dst.VT (OpNode
|
||||
(_Src.VT _Src.RC:$src1),
|
||||
(_Src.VT _Src.RC:$src2))),
|
||||
"",itins.rr, IsCommutable>,
|
||||
itins.rr, IsCommutable>,
|
||||
AVX512BIBase, EVEX_4V;
|
||||
let mayLoad = 1 in {
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
|
||||
|
@ -3098,7 +3097,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
|
|||
"$src2, $src1", "$src1, $src2",
|
||||
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
|
||||
(bitconvert (_Src.LdFrag addr:$src2)))),
|
||||
"", itins.rm>,
|
||||
itins.rm>,
|
||||
AVX512BIBase, EVEX_4V;
|
||||
|
||||
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
|
||||
|
@ -3109,7 +3108,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
|
|||
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
|
||||
(_Dst.VT (X86VBroadcast
|
||||
(_Dst.ScalarLdFrag addr:$src2)))))),
|
||||
"", itins.rm>,
|
||||
itins.rm>,
|
||||
AVX512BIBase, EVEX_4V, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
@ -3165,8 +3164,7 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
"$src1, ${src2}"##_Src.BroadcastStr,
|
||||
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
|
||||
(_Src.VT (X86VBroadcast
|
||||
(_Src.ScalarLdFrag addr:$src2)))))),
|
||||
"">,
|
||||
(_Src.ScalarLdFrag addr:$src2))))))>,
|
||||
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
@ -3179,15 +3177,15 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
|
|||
"$src2, $src1","$src1, $src2",
|
||||
(_Dst.VT (OpNode
|
||||
(_Src.VT _Src.RC:$src1),
|
||||
(_Src.VT _Src.RC:$src2))),
|
||||
"">, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
|
||||
(_Src.VT _Src.RC:$src2)))>,
|
||||
EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
|
||||
let mayLoad = 1 in {
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
|
||||
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
|
||||
(bitconvert (_Src.LdFrag addr:$src2)))),
|
||||
"">, EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
|
||||
(bitconvert (_Src.LdFrag addr:$src2))))>,
|
||||
EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3390,7 +3388,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
|||
"$src2, $src1", "$src1, $src2",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_CURRENT)),
|
||||
"", itins.rr, IsCommutable>;
|
||||
itins.rr, IsCommutable>;
|
||||
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||
|
@ -3398,7 +3396,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
|||
(VecNode (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(i32 FROUND_CURRENT)),
|
||||
"", itins.rm, IsCommutable>;
|
||||
itins.rm, IsCommutable>;
|
||||
let isCodeGenOnly = 1, isCommutable = IsCommutable,
|
||||
Predicates = [HasAVX512] in {
|
||||
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
|
@ -3421,7 +3419,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
|
|||
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
|
||||
"$rc, $src2, $src1", "$src1, $src2, $rc",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$rc)), "", itins.rr, IsCommutable>,
|
||||
(i32 imm:$rc)), itins.rr, IsCommutable>,
|
||||
EVEX_B, EVEX_RC;
|
||||
}
|
||||
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
|
@ -3429,9 +3427,9 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
|||
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
}
|
||||
|
||||
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -3500,6 +3498,16 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
|
|||
EVEX_4V, EVEX_B, EVEX_RC;
|
||||
}
|
||||
|
||||
|
||||
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
|
||||
X86VectorVTInfo _, bit IsCommutable> {
|
||||
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
|
||||
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
|
||||
(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
|
||||
EVEX_4V, EVEX_B;
|
||||
}
|
||||
|
||||
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
bit IsCommutable = 0> {
|
||||
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
|
||||
|
@ -3533,6 +3541,13 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
|
|||
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
|
||||
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
|
||||
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
|
||||
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
|
||||
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
|
||||
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
|
||||
|
@ -3541,33 +3556,17 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
|
|||
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
|
||||
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
|
||||
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
|
||||
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
|
||||
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
|
||||
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>,
|
||||
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
|
||||
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>,
|
||||
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
|
||||
let Predicates = [HasDQI] in {
|
||||
defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, 1>;
|
||||
defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, 0>;
|
||||
defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, 1>;
|
||||
defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
|
||||
}
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
|
||||
(i16 -1), FROUND_CURRENT)),
|
||||
(VMAXPSZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
|
||||
(i8 -1), FROUND_CURRENT)),
|
||||
(VMAXPDZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
|
||||
(i16 -1), FROUND_CURRENT)),
|
||||
(VMINPSZrr VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
|
||||
(i8 -1), FROUND_CURRENT)),
|
||||
(VMINPDZrr VR512:$src1, VR512:$src2)>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 VPTESTM instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -3667,14 +3666,14 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
|
|||
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
|
||||
SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
|
||||
let mayLoad = 1 in
|
||||
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
|
||||
(i8 imm:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
|
||||
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
|
||||
}
|
||||
|
||||
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
|
||||
|
@ -3684,7 +3683,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
|
|||
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
|
||||
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
|
||||
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B;
|
||||
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B;
|
||||
}
|
||||
|
||||
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -3694,12 +3693,12 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
|
||||
SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
|
||||
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
|
||||
EVEX_4V;
|
||||
}
|
||||
|
||||
|
@ -3798,13 +3797,13 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
|
||||
SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
|
||||
let mayLoad = 1 in
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
|
||||
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
|
||||
EVEX_CD8<_.EltSize, CD8VF>;
|
||||
}
|
||||
|
||||
|
@ -3817,7 +3816,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
"$src1, ${src2}"##_.BroadcastStr,
|
||||
(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2))))),
|
||||
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
|
||||
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
|
||||
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
}
|
||||
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
@ -4775,9 +4774,9 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
|||
|
||||
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
|
||||
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||
|
@ -4809,9 +4808,8 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
|||
|
||||
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src), OpcodeStr,
|
||||
"$src", "$src",
|
||||
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
|
||||
"{sae}">, EVEX_B;
|
||||
"{sae}, $src", "$src, {sae}",
|
||||
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
|
||||
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
|
||||
|
@ -5051,9 +5049,9 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
|||
|
||||
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
"{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
|
||||
(_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$src3), (i32 FROUND_NO_EXC))), "{sae}">, EVEX_B;
|
||||
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
|
||||
|
||||
let mayLoad = 1 in
|
||||
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
|
|
|
@ -293,8 +293,8 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
|
|||
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
|
||||
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
|
||||
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
|
||||
def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>;
|
||||
def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>;
|
||||
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
|
||||
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
|
||||
|
||||
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
|
||||
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
|
||||
|
|
|
@ -243,8 +243,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
|
||||
X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_ps_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_ps_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
|
||||
X86ISD::FADD_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_and_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
|
||||
|
@ -322,8 +326,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::COMPRESS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
|
||||
X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_ps_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_ps_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
|
||||
X86ISD::FDIV_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
|
||||
|
@ -350,9 +358,28 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
|
||||
X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
|
||||
X86ISD::FMAX_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
|
||||
X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
|
||||
X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ps_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ps_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
|
||||
X86ISD::FMUL_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_or_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
|
||||
|
@ -512,8 +539,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::RNDSCALE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::RNDSCALE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
|
||||
X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ps_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ps_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
|
||||
X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
|
||||
|
|
|
@ -434,15 +434,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no
|
|||
declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
|
||||
|
||||
; fp min - max
|
||||
define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK: vmaxps
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
|
||||
<16 x float>, i16, i32)
|
||||
|
||||
define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
|
||||
; CHECK: vmaxpd
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
|
@ -452,15 +443,6 @@ define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
|
|||
declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
|
||||
<8 x double>, i8, i32)
|
||||
|
||||
define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK: vminps
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
|
||||
<16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
|
||||
<16 x float>, i16, i32)
|
||||
|
||||
define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
|
||||
; CHECK: vminpd
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
|
||||
|
@ -2240,3 +2222,362 @@ define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i
|
|||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
|
||||
;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
|
||||
;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
|
||||
;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
|
||||
;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
|
||||
;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
|
||||
;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
|
||||
;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
|
||||
;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
|
||||
;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_round_ps_current
|
||||
;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
|
||||
;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
|
||||
;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
|
||||
;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
|
||||
;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_round_ps_current
|
||||
;CHECK: vaddps %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
|
||||
;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
|
||||
;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
|
||||
;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
|
||||
;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
|
||||
;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
|
||||
;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
|
||||
;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
|
||||
;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
|
||||
;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_round_ps_current
|
||||
;CHECK: vsubps %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
|
||||
;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
|
||||
;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
|
||||
;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
|
||||
;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
|
||||
;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
|
||||
;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
|
||||
;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
|
||||
;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
|
||||
;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_round_ps_current
|
||||
;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
|
||||
;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
|
||||
;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
|
||||
;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
|
||||
;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_round_ps_current
|
||||
;CHECK: vdivps %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
|
||||
;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
|
||||
;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
|
||||
;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_min_round_ps_current
|
||||
;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_min_round_ps_sae
|
||||
;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_min_round_ps_current
|
||||
;CHECK: vminps %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
|
||||
;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
|
||||
;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
|
||||
;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_max_round_ps_current
|
||||
;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_max_round_ps_sae
|
||||
;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
|
||||
;CHECK-LABEL: test_mm512_max_round_ps_current
|
||||
;CHECK: vmaxps %zmm1, %zmm0, %zmm0
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=knl --show-mc-encoding| FileCheck %s
|
||||
|
||||
define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt28ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
|
||||
; CHECK: vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
|
||||
; CHECK: kmovw
|
||||
; CHECK: vrsqrt28ps %zmm0, %zmm1 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
|
||||
; CHECK: vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
|
|||
}
|
||||
|
||||
define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt28ps %zmm0, %zmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
|
||||
; CHECK: vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
@ -36,61 +36,61 @@ define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
|
|||
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrcp28ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
|
||||
; CHECK: vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrcp28pd %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
|
||||
; CHECK: vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
|
||||
%res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vexp2ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
|
||||
; CHECK: vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
|
||||
%res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vexp2pd %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
|
||||
; CHECK: vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
|
||||
%res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
|
||||
; CHECK: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrcp28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
|
||||
; CHECK: vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
|
||||
; CHECK: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 7, i32 8) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0) {
|
||||
; CHECK: vrsqrt28ss %xmm1, %xmm0, %xmm2 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
|
||||
; CHECK: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 7, i32 8) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0) {
|
||||
; CHECK: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
|
||||
; CHECK: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
|
||||
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 7, i32 8) ;
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
|
|
@ -2289,3 +2289,267 @@ define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
|
|||
ret i8 %res
|
||||
}
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_ps_256
|
||||
;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_ps_256
|
||||
;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_ps_256
|
||||
;CHECK: vaddps %ymm1, %ymm0, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_add_ps_128
|
||||
;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_add_ps_128
|
||||
;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_add_ps_128
|
||||
;CHECK: vaddps %xmm1, %xmm0, %xmm0
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_sub_ps_256
|
||||
;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_ps_256
|
||||
;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_ps_256
|
||||
;CHECK: vsubps %ymm1, %ymm0, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_sub_ps_128
|
||||
;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_sub_ps_128
|
||||
;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_sub_ps_128
|
||||
;CHECK: vsubps %xmm1, %xmm0, %xmm0
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_mul_ps_256
|
||||
;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_mul_ps_256
|
||||
;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mul_ps_256
|
||||
;CHECK: vmulps %ymm1, %ymm0, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_mul_ps_128
|
||||
;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_mul_ps_128
|
||||
;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mul_ps_128
|
||||
;CHECK: vmulps %xmm1, %xmm0, %xmm0
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_ps_256
|
||||
;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_ps_256
|
||||
;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_ps_256
|
||||
;CHECK: vdivps %ymm1, %ymm0, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_div_ps_128
|
||||
;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_div_ps_128
|
||||
;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_div_ps_128
|
||||
;CHECK: vdivps %xmm1, %xmm0, %xmm0
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_max_ps_256
|
||||
;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_max_ps_256
|
||||
;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_max_ps_256
|
||||
;CHECK: vmaxps %ymm1, %ymm0, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_max_ps_128
|
||||
;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_max_ps_128
|
||||
;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_max_ps_128
|
||||
;CHECK: vmaxps %xmm1, %xmm0, %xmm0
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
||||
|
||||
define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_min_ps_256
|
||||
;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_min_ps_256
|
||||
;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_min_ps_256
|
||||
;CHECK: vminps %ymm1, %ymm0, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
|
||||
|
||||
define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_maskz_min_ps_128
|
||||
;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_mask_min_ps_128
|
||||
;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: test_mm512_min_ps_128
|
||||
;CHECK: vminps %xmm1, %xmm0, %xmm0
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
|
|
@ -6112,3 +6112,586 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
|
|||
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
|
||||
vcmpps $0x7b, -516(%rdx){1to16}, %zmm17, %k2
|
||||
|
||||
// CHECK: vaddpd {rn-sae}, %zmm6, %zmm27, %zmm8
|
||||
// CHECK: encoding: [0x62,0x71,0xa5,0x10,0x58,0xc6]
|
||||
vaddpd {rn-sae}, %zmm6, %zmm27, %zmm8
|
||||
|
||||
// CHECK: vaddpd {ru-sae}, %zmm6, %zmm27, %zmm8
|
||||
// CHECK: encoding: [0x62,0x71,0xa5,0x50,0x58,0xc6]
|
||||
vaddpd {ru-sae}, %zmm6, %zmm27, %zmm8
|
||||
|
||||
// CHECK: vaddpd {rd-sae}, %zmm6, %zmm27, %zmm8
|
||||
// CHECK: encoding: [0x62,0x71,0xa5,0x30,0x58,0xc6]
|
||||
vaddpd {rd-sae}, %zmm6, %zmm27, %zmm8
|
||||
|
||||
// CHECK: vaddpd {rz-sae}, %zmm6, %zmm27, %zmm8
|
||||
// CHECK: encoding: [0x62,0x71,0xa5,0x70,0x58,0xc6]
|
||||
vaddpd {rz-sae}, %zmm6, %zmm27, %zmm8
|
||||
|
||||
// CHECK: vaddps {rn-sae}, %zmm2, %zmm13, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x14,0x18,0x58,0xd2]
|
||||
vaddps {rn-sae}, %zmm2, %zmm13, %zmm18
|
||||
|
||||
// CHECK: vaddps {ru-sae}, %zmm2, %zmm13, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x14,0x58,0x58,0xd2]
|
||||
vaddps {ru-sae}, %zmm2, %zmm13, %zmm18
|
||||
|
||||
// CHECK: vaddps {rd-sae}, %zmm2, %zmm13, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x14,0x38,0x58,0xd2]
|
||||
vaddps {rd-sae}, %zmm2, %zmm13, %zmm18
|
||||
|
||||
// CHECK: vaddps {rz-sae}, %zmm2, %zmm13, %zmm18
|
||||
// CHECK: encoding: [0x62,0xe1,0x14,0x78,0x58,0xd2]
|
||||
vaddps {rz-sae}, %zmm2, %zmm13, %zmm18
|
||||
|
||||
// CHECK: vaddsd %xmm8, %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x00,0x58,0xd8]
|
||||
vaddsd %xmm8, %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd %xmm8, %xmm17, %xmm3 {%k3}
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x03,0x58,0xd8]
|
||||
vaddsd %xmm8, %xmm17, %xmm3 {%k3}
|
||||
|
||||
// CHECK: vaddsd %xmm8, %xmm17, %xmm3 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x83,0x58,0xd8]
|
||||
vaddsd %xmm8, %xmm17, %xmm3 {%k3} {z}
|
||||
|
||||
// CHECK: vaddsd {rn-sae}, %xmm8, %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x10,0x58,0xd8]
|
||||
vaddsd {rn-sae}, %xmm8, %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd {ru-sae}, %xmm8, %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x50,0x58,0xd8]
|
||||
vaddsd {ru-sae}, %xmm8, %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd {rd-sae}, %xmm8, %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x30,0x58,0xd8]
|
||||
vaddsd {rd-sae}, %xmm8, %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd {rz-sae}, %xmm8, %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xd1,0xf7,0x70,0x58,0xd8]
|
||||
vaddsd {rz-sae}, %xmm8, %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd (%rcx), %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xf1,0xf7,0x00,0x58,0x19]
|
||||
vaddsd (%rcx), %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd 291(%rax,%r14,8), %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xb1,0xf7,0x00,0x58,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vaddsd 291(%rax,%r14,8), %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd 1016(%rdx), %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xf1,0xf7,0x00,0x58,0x5a,0x7f]
|
||||
vaddsd 1016(%rdx), %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd 1024(%rdx), %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xf1,0xf7,0x00,0x58,0x9a,0x00,0x04,0x00,0x00]
|
||||
vaddsd 1024(%rdx), %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd -1024(%rdx), %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xf1,0xf7,0x00,0x58,0x5a,0x80]
|
||||
vaddsd -1024(%rdx), %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddsd -1032(%rdx), %xmm17, %xmm3
|
||||
// CHECK: encoding: [0x62,0xf1,0xf7,0x00,0x58,0x9a,0xf8,0xfb,0xff,0xff]
|
||||
vaddsd -1032(%rdx), %xmm17, %xmm3
|
||||
|
||||
// CHECK: vaddss %xmm19, %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x08,0x58,0xfb]
|
||||
vaddss %xmm19, %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss %xmm19, %xmm5, %xmm7 {%k2}
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x0a,0x58,0xfb]
|
||||
vaddss %xmm19, %xmm5, %xmm7 {%k2}
|
||||
|
||||
// CHECK: vaddss %xmm19, %xmm5, %xmm7 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x8a,0x58,0xfb]
|
||||
vaddss %xmm19, %xmm5, %xmm7 {%k2} {z}
|
||||
|
||||
// CHECK: vaddss {rn-sae}, %xmm19, %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x18,0x58,0xfb]
|
||||
vaddss {rn-sae}, %xmm19, %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss {ru-sae}, %xmm19, %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x58,0x58,0xfb]
|
||||
vaddss {ru-sae}, %xmm19, %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss {rd-sae}, %xmm19, %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x38,0x58,0xfb]
|
||||
vaddss {rd-sae}, %xmm19, %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss {rz-sae}, %xmm19, %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x78,0x58,0xfb]
|
||||
vaddss {rz-sae}, %xmm19, %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss (%rcx), %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0x39]
|
||||
vaddss (%rcx), %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss 291(%rax,%r14,8), %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xb1,0x56,0x08,0x58,0xbc,0xf0,0x23,0x01,0x00,0x00]
|
||||
vaddss 291(%rax,%r14,8), %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss 508(%rdx), %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0x7a,0x7f]
|
||||
vaddss 508(%rdx), %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss 512(%rdx), %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0xba,0x00,0x02,0x00,0x00]
|
||||
vaddss 512(%rdx), %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss -512(%rdx), %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0x7a,0x80]
|
||||
vaddss -512(%rdx), %xmm5, %xmm7
|
||||
|
||||
// CHECK: vaddss -516(%rdx), %xmm5, %xmm7
|
||||
// CHECK: encoding: [0x62,0xf1,0x56,0x08,0x58,0xba,0xfc,0xfd,0xff,0xff]
|
||||
vaddss -516(%rdx), %xmm5, %xmm7
|
||||
|
||||
// CHECK: vdivpd {rn-sae}, %zmm11, %zmm6, %zmm18
|
||||
// CHECK: encoding: [0x62,0xc1,0xcd,0x18,0x5e,0xd3]
|
||||
vdivpd {rn-sae}, %zmm11, %zmm6, %zmm18
|
||||
|
||||
// CHECK: vdivpd {ru-sae}, %zmm11, %zmm6, %zmm18
|
||||
// CHECK: encoding: [0x62,0xc1,0xcd,0x58,0x5e,0xd3]
|
||||
vdivpd {ru-sae}, %zmm11, %zmm6, %zmm18
|
||||
|
||||
// CHECK: vdivpd {rd-sae}, %zmm11, %zmm6, %zmm18
|
||||
// CHECK: encoding: [0x62,0xc1,0xcd,0x38,0x5e,0xd3]
|
||||
vdivpd {rd-sae}, %zmm11, %zmm6, %zmm18
|
||||
|
||||
// CHECK: vdivpd {rz-sae}, %zmm11, %zmm6, %zmm18
|
||||
// CHECK: encoding: [0x62,0xc1,0xcd,0x78,0x5e,0xd3]
|
||||
vdivpd {rz-sae}, %zmm11, %zmm6, %zmm18
|
||||
|
||||
// CHECK: vdivps {rn-sae}, %zmm28, %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0x81,0x44,0x10,0x5e,0xfc]
|
||||
vdivps {rn-sae}, %zmm28, %zmm23, %zmm23
|
||||
|
||||
// CHECK: vdivps {ru-sae}, %zmm28, %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0x81,0x44,0x50,0x5e,0xfc]
|
||||
vdivps {ru-sae}, %zmm28, %zmm23, %zmm23
|
||||
|
||||
// CHECK: vdivps {rd-sae}, %zmm28, %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0x81,0x44,0x30,0x5e,0xfc]
|
||||
vdivps {rd-sae}, %zmm28, %zmm23, %zmm23
|
||||
|
||||
// CHECK: vdivps {rz-sae}, %zmm28, %zmm23, %zmm23
|
||||
// CHECK: encoding: [0x62,0x81,0x44,0x70,0x5e,0xfc]
|
||||
vdivps {rz-sae}, %zmm28, %zmm23, %zmm23
|
||||
|
||||
// CHECK: vdivsd %xmm22, %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x08,0x5e,0xee]
|
||||
vdivsd %xmm22, %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd %xmm22, %xmm13, %xmm29 {%k3}
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x0b,0x5e,0xee]
|
||||
vdivsd %xmm22, %xmm13, %xmm29 {%k3}
|
||||
|
||||
// CHECK: vdivsd %xmm22, %xmm13, %xmm29 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x8b,0x5e,0xee]
|
||||
vdivsd %xmm22, %xmm13, %xmm29 {%k3} {z}
|
||||
|
||||
// CHECK: vdivsd {rn-sae}, %xmm22, %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x18,0x5e,0xee]
|
||||
vdivsd {rn-sae}, %xmm22, %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd {ru-sae}, %xmm22, %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x58,0x5e,0xee]
|
||||
vdivsd {ru-sae}, %xmm22, %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd {rd-sae}, %xmm22, %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x38,0x5e,0xee]
|
||||
vdivsd {rd-sae}, %xmm22, %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd {rz-sae}, %xmm22, %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x78,0x5e,0xee]
|
||||
vdivsd {rz-sae}, %xmm22, %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd (%rcx), %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x97,0x08,0x5e,0x29]
|
||||
vdivsd (%rcx), %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd 291(%rax,%r14,8), %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x21,0x97,0x08,0x5e,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vdivsd 291(%rax,%r14,8), %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd 1016(%rdx), %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x97,0x08,0x5e,0x6a,0x7f]
|
||||
vdivsd 1016(%rdx), %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd 1024(%rdx), %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x97,0x08,0x5e,0xaa,0x00,0x04,0x00,0x00]
|
||||
vdivsd 1024(%rdx), %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd -1024(%rdx), %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x97,0x08,0x5e,0x6a,0x80]
|
||||
vdivsd -1024(%rdx), %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivsd -1032(%rdx), %xmm13, %xmm29
|
||||
// CHECK: encoding: [0x62,0x61,0x97,0x08,0x5e,0xaa,0xf8,0xfb,0xff,0xff]
|
||||
vdivsd -1032(%rdx), %xmm13, %xmm29
|
||||
|
||||
// CHECK: vdivss %xmm17, %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x08,0x5e,0xe9]
|
||||
vdivss %xmm17, %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss %xmm17, %xmm6, %xmm21 {%k5}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x0d,0x5e,0xe9]
|
||||
vdivss %xmm17, %xmm6, %xmm21 {%k5}
|
||||
|
||||
// CHECK: vdivss %xmm17, %xmm6, %xmm21 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x8d,0x5e,0xe9]
|
||||
vdivss %xmm17, %xmm6, %xmm21 {%k5} {z}
|
||||
|
||||
// CHECK: vdivss {rn-sae}, %xmm17, %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x18,0x5e,0xe9]
|
||||
vdivss {rn-sae}, %xmm17, %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss {ru-sae}, %xmm17, %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x58,0x5e,0xe9]
|
||||
vdivss {ru-sae}, %xmm17, %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss {rd-sae}, %xmm17, %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x38,0x5e,0xe9]
|
||||
vdivss {rd-sae}, %xmm17, %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss {rz-sae}, %xmm17, %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x78,0x5e,0xe9]
|
||||
vdivss {rz-sae}, %xmm17, %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss (%rcx), %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4e,0x08,0x5e,0x29]
|
||||
vdivss (%rcx), %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss 291(%rax,%r14,8), %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xa1,0x4e,0x08,0x5e,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vdivss 291(%rax,%r14,8), %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss 508(%rdx), %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4e,0x08,0x5e,0x6a,0x7f]
|
||||
vdivss 508(%rdx), %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss 512(%rdx), %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4e,0x08,0x5e,0xaa,0x00,0x02,0x00,0x00]
|
||||
vdivss 512(%rdx), %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss -512(%rdx), %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4e,0x08,0x5e,0x6a,0x80]
|
||||
vdivss -512(%rdx), %xmm6, %xmm21
|
||||
|
||||
// CHECK: vdivss -516(%rdx), %xmm6, %xmm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x4e,0x08,0x5e,0xaa,0xfc,0xfd,0xff,0xff]
|
||||
vdivss -516(%rdx), %xmm6, %xmm21
|
||||
|
||||
// CHECK: vmaxpd {sae}, %zmm20, %zmm28, %zmm30
|
||||
// CHECK: encoding: [0x62,0x21,0x9d,0x10,0x5f,0xf4]
|
||||
vmaxpd {sae}, %zmm20, %zmm28, %zmm30
|
||||
|
||||
// CHECK: vmaxps {sae}, %zmm20, %zmm6, %zmm25
|
||||
// CHECK: encoding: [0x62,0x21,0x4c,0x18,0x5f,0xcc]
|
||||
vmaxps {sae}, %zmm20, %zmm6, %zmm25
|
||||
|
||||
// CHECK: vmaxsd %xmm25, %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0x81,0xe7,0x00,0x5f,0xe1]
|
||||
vmaxsd %xmm25, %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd %xmm25, %xmm19, %xmm20 {%k3}
|
||||
// CHECK: encoding: [0x62,0x81,0xe7,0x03,0x5f,0xe1]
|
||||
vmaxsd %xmm25, %xmm19, %xmm20 {%k3}
|
||||
|
||||
// CHECK: vmaxsd %xmm25, %xmm19, %xmm20 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0x81,0xe7,0x83,0x5f,0xe1]
|
||||
vmaxsd %xmm25, %xmm19, %xmm20 {%k3} {z}
|
||||
|
||||
// CHECK: vmaxsd {sae}, %xmm25, %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0x81,0xe7,0x10,0x5f,0xe1]
|
||||
vmaxsd {sae}, %xmm25, %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd (%rcx), %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0xe7,0x00,0x5f,0x21]
|
||||
vmaxsd (%rcx), %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd 291(%rax,%r14,8), %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0xa1,0xe7,0x00,0x5f,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmaxsd 291(%rax,%r14,8), %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd 1016(%rdx), %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0xe7,0x00,0x5f,0x62,0x7f]
|
||||
vmaxsd 1016(%rdx), %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd 1024(%rdx), %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0xe7,0x00,0x5f,0xa2,0x00,0x04,0x00,0x00]
|
||||
vmaxsd 1024(%rdx), %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd -1024(%rdx), %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0xe7,0x00,0x5f,0x62,0x80]
|
||||
vmaxsd -1024(%rdx), %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxsd -1032(%rdx), %xmm19, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe1,0xe7,0x00,0x5f,0xa2,0xf8,0xfb,0xff,0xff]
|
||||
vmaxsd -1032(%rdx), %xmm19, %xmm20
|
||||
|
||||
// CHECK: vmaxss %xmm6, %xmm4, %xmm8
|
||||
// CHECK: encoding: [0xc5,0x5a,0x5f,0xc6]
|
||||
vmaxss %xmm6, %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss %xmm6, %xmm4, %xmm8 {%k4}
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x0c,0x5f,0xc6]
|
||||
vmaxss %xmm6, %xmm4, %xmm8 {%k4}
|
||||
|
||||
// CHECK: vmaxss %xmm6, %xmm4, %xmm8 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x8c,0x5f,0xc6]
|
||||
vmaxss %xmm6, %xmm4, %xmm8 {%k4} {z}
|
||||
|
||||
// CHECK: vmaxss {sae}, %xmm6, %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x18,0x5f,0xc6]
|
||||
vmaxss {sae}, %xmm6, %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss (%rcx), %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x01]
|
||||
vmaxss (%rcx), %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss 291(%rax,%r14,8), %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x31,0x5e,0x08,0x5f,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmaxss 291(%rax,%r14,8), %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss 508(%rdx), %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x42,0x7f]
|
||||
vmaxss 508(%rdx), %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss 512(%rdx), %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x82,0x00,0x02,0x00,0x00]
|
||||
vmaxss 512(%rdx), %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss -512(%rdx), %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x42,0x80]
|
||||
vmaxss -512(%rdx), %xmm4, %xmm8
|
||||
|
||||
// CHECK: vmaxss -516(%rdx), %xmm4, %xmm8
|
||||
// CHECK: encoding: [0x62,0x71,0x5e,0x08,0x5f,0x82,0xfc,0xfd,0xff,0xff]
|
||||
vmaxss -516(%rdx), %xmm4, %xmm8
|
||||
|
||||
// CHECK: vminpd {sae}, %zmm22, %zmm6, %zmm6
|
||||
// CHECK: encoding: [0x62,0xb1,0xcd,0x18,0x5d,0xf6]
|
||||
vminpd {sae}, %zmm22, %zmm6, %zmm6
|
||||
|
||||
// CHECK: vminps {sae}, %zmm7, %zmm3, %zmm3
|
||||
// CHECK: encoding: [0x62,0xf1,0x64,0x18,0x5d,0xdf]
|
||||
vminps {sae}, %zmm7, %zmm3, %zmm3
|
||||
|
||||
// CHECK: vminsd %xmm26, %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0x91,0xb7,0x00,0x5d,0xea]
|
||||
vminsd %xmm26, %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd %xmm26, %xmm25, %xmm5 {%k3}
|
||||
// CHECK: encoding: [0x62,0x91,0xb7,0x03,0x5d,0xea]
|
||||
vminsd %xmm26, %xmm25, %xmm5 {%k3}
|
||||
|
||||
// CHECK: vminsd %xmm26, %xmm25, %xmm5 {%k3} {z}
|
||||
// CHECK: encoding: [0x62,0x91,0xb7,0x83,0x5d,0xea]
|
||||
vminsd %xmm26, %xmm25, %xmm5 {%k3} {z}
|
||||
|
||||
// CHECK: vminsd {sae}, %xmm26, %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0x91,0xb7,0x10,0x5d,0xea]
|
||||
vminsd {sae}, %xmm26, %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd (%rcx), %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xb7,0x00,0x5d,0x29]
|
||||
vminsd (%rcx), %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd 291(%rax,%r14,8), %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0xb1,0xb7,0x00,0x5d,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vminsd 291(%rax,%r14,8), %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd 1016(%rdx), %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xb7,0x00,0x5d,0x6a,0x7f]
|
||||
vminsd 1016(%rdx), %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd 1024(%rdx), %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xb7,0x00,0x5d,0xaa,0x00,0x04,0x00,0x00]
|
||||
vminsd 1024(%rdx), %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd -1024(%rdx), %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xb7,0x00,0x5d,0x6a,0x80]
|
||||
vminsd -1024(%rdx), %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminsd -1032(%rdx), %xmm25, %xmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xb7,0x00,0x5d,0xaa,0xf8,0xfb,0xff,0xff]
|
||||
vminsd -1032(%rdx), %xmm25, %xmm5
|
||||
|
||||
// CHECK: vminss %xmm19, %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x31,0x76,0x00,0x5d,0xd3]
|
||||
vminss %xmm19, %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss %xmm19, %xmm17, %xmm10 {%k5}
|
||||
// CHECK: encoding: [0x62,0x31,0x76,0x05,0x5d,0xd3]
|
||||
vminss %xmm19, %xmm17, %xmm10 {%k5}
|
||||
|
||||
// CHECK: vminss %xmm19, %xmm17, %xmm10 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0x31,0x76,0x85,0x5d,0xd3]
|
||||
vminss %xmm19, %xmm17, %xmm10 {%k5} {z}
|
||||
|
||||
// CHECK: vminss {sae}, %xmm19, %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x31,0x76,0x10,0x5d,0xd3]
|
||||
vminss {sae}, %xmm19, %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss (%rcx), %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x71,0x76,0x00,0x5d,0x11]
|
||||
vminss (%rcx), %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss 291(%rax,%r14,8), %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x31,0x76,0x00,0x5d,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vminss 291(%rax,%r14,8), %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss 508(%rdx), %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x71,0x76,0x00,0x5d,0x52,0x7f]
|
||||
vminss 508(%rdx), %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss 512(%rdx), %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x71,0x76,0x00,0x5d,0x92,0x00,0x02,0x00,0x00]
|
||||
vminss 512(%rdx), %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss -512(%rdx), %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x71,0x76,0x00,0x5d,0x52,0x80]
|
||||
vminss -512(%rdx), %xmm17, %xmm10
|
||||
|
||||
// CHECK: vminss -516(%rdx), %xmm17, %xmm10
|
||||
// CHECK: encoding: [0x62,0x71,0x76,0x00,0x5d,0x92,0xfc,0xfd,0xff,0xff]
|
||||
vminss -516(%rdx), %xmm17, %xmm10
|
||||
|
||||
// CHECK: vmulpd {rn-sae}, %zmm23, %zmm4, %zmm24
|
||||
// CHECK: encoding: [0x62,0x21,0xdd,0x18,0x59,0xc7]
|
||||
vmulpd {rn-sae}, %zmm23, %zmm4, %zmm24
|
||||
|
||||
// CHECK: vmulpd {ru-sae}, %zmm23, %zmm4, %zmm24
|
||||
// CHECK: encoding: [0x62,0x21,0xdd,0x58,0x59,0xc7]
|
||||
vmulpd {ru-sae}, %zmm23, %zmm4, %zmm24
|
||||
|
||||
// CHECK: vmulpd {rd-sae}, %zmm23, %zmm4, %zmm24
|
||||
// CHECK: encoding: [0x62,0x21,0xdd,0x38,0x59,0xc7]
|
||||
vmulpd {rd-sae}, %zmm23, %zmm4, %zmm24
|
||||
|
||||
// CHECK: vmulpd {rz-sae}, %zmm23, %zmm4, %zmm24
|
||||
// CHECK: encoding: [0x62,0x21,0xdd,0x78,0x59,0xc7]
|
||||
vmulpd {rz-sae}, %zmm23, %zmm4, %zmm24
|
||||
|
||||
// CHECK: vmulps {rn-sae}, %zmm24, %zmm6, %zmm3
|
||||
// CHECK: encoding: [0x62,0x91,0x4c,0x18,0x59,0xd8]
|
||||
vmulps {rn-sae}, %zmm24, %zmm6, %zmm3
|
||||
|
||||
// CHECK: vmulps {ru-sae}, %zmm24, %zmm6, %zmm3
|
||||
// CHECK: encoding: [0x62,0x91,0x4c,0x58,0x59,0xd8]
|
||||
vmulps {ru-sae}, %zmm24, %zmm6, %zmm3
|
||||
|
||||
// CHECK: vmulps {rd-sae}, %zmm24, %zmm6, %zmm3
|
||||
// CHECK: encoding: [0x62,0x91,0x4c,0x38,0x59,0xd8]
|
||||
vmulps {rd-sae}, %zmm24, %zmm6, %zmm3
|
||||
|
||||
// CHECK: vmulps {rz-sae}, %zmm24, %zmm6, %zmm3
|
||||
// CHECK: encoding: [0x62,0x91,0x4c,0x78,0x59,0xd8]
|
||||
vmulps {rz-sae}, %zmm24, %zmm6, %zmm3
|
||||
|
||||
// CHECK: vmulsd %xmm18, %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x08,0x59,0xea]
|
||||
vmulsd %xmm18, %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd %xmm18, %xmm4, %xmm13 {%k2}
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x0a,0x59,0xea]
|
||||
vmulsd %xmm18, %xmm4, %xmm13 {%k2}
|
||||
|
||||
// CHECK: vmulsd %xmm18, %xmm4, %xmm13 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x8a,0x59,0xea]
|
||||
vmulsd %xmm18, %xmm4, %xmm13 {%k2} {z}
|
||||
|
||||
// CHECK: vmulsd {rn-sae}, %xmm18, %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x18,0x59,0xea]
|
||||
vmulsd {rn-sae}, %xmm18, %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd {ru-sae}, %xmm18, %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x58,0x59,0xea]
|
||||
vmulsd {ru-sae}, %xmm18, %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd {rd-sae}, %xmm18, %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x38,0x59,0xea]
|
||||
vmulsd {rd-sae}, %xmm18, %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd {rz-sae}, %xmm18, %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x78,0x59,0xea]
|
||||
vmulsd {rz-sae}, %xmm18, %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd (%rcx), %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0x29]
|
||||
vmulsd (%rcx), %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd 291(%rax,%r14,8), %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x31,0xdf,0x08,0x59,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmulsd 291(%rax,%r14,8), %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd 1016(%rdx), %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0x6a,0x7f]
|
||||
vmulsd 1016(%rdx), %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd 1024(%rdx), %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0xaa,0x00,0x04,0x00,0x00]
|
||||
vmulsd 1024(%rdx), %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd -1024(%rdx), %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0x6a,0x80]
|
||||
vmulsd -1024(%rdx), %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulsd -1032(%rdx), %xmm4, %xmm13
|
||||
// CHECK: encoding: [0x62,0x71,0xdf,0x08,0x59,0xaa,0xf8,0xfb,0xff,0xff]
|
||||
vmulsd -1032(%rdx), %xmm4, %xmm13
|
||||
|
||||
// CHECK: vmulss %xmm14, %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x08,0x59,0xf6]
|
||||
vmulss %xmm14, %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss %xmm14, %xmm10, %xmm22 {%k4}
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x0c,0x59,0xf6]
|
||||
vmulss %xmm14, %xmm10, %xmm22 {%k4}
|
||||
|
||||
// CHECK: vmulss %xmm14, %xmm10, %xmm22 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x8c,0x59,0xf6]
|
||||
vmulss %xmm14, %xmm10, %xmm22 {%k4} {z}
|
||||
|
||||
// CHECK: vmulss {rn-sae}, %xmm14, %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x18,0x59,0xf6]
|
||||
vmulss {rn-sae}, %xmm14, %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss {ru-sae}, %xmm14, %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x58,0x59,0xf6]
|
||||
vmulss {ru-sae}, %xmm14, %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss {rd-sae}, %xmm14, %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x38,0x59,0xf6]
|
||||
vmulss {rd-sae}, %xmm14, %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss {rz-sae}, %xmm14, %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xc1,0x2e,0x78,0x59,0xf6]
|
||||
vmulss {rz-sae}, %xmm14, %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss (%rcx), %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x2e,0x08,0x59,0x31]
|
||||
vmulss (%rcx), %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss 291(%rax,%r14,8), %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xa1,0x2e,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmulss 291(%rax,%r14,8), %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss 508(%rdx), %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x2e,0x08,0x59,0x72,0x7f]
|
||||
vmulss 508(%rdx), %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss 512(%rdx), %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x2e,0x08,0x59,0xb2,0x00,0x02,0x00,0x00]
|
||||
vmulss 512(%rdx), %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss -512(%rdx), %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x2e,0x08,0x59,0x72,0x80]
|
||||
vmulss -512(%rdx), %xmm10, %xmm22
|
||||
|
||||
// CHECK: vmulss -516(%rdx), %xmm10, %xmm22
|
||||
// CHECK: encoding: [0x62,0xe1,0x2e,0x08,0x59,0xb2,0xfc,0xfd,0xff,0xff]
|
||||
vmulss -516(%rdx), %xmm10, %xmm22
|
||||
|
|
Loading…
Reference in New Issue