forked from OSchip/llvm-project
[X86] Remove all of the avx512 masked packed fma intrinsics. Use llvm.fma or unmasked 512-bit intrinsics with rounding mode.
This upgrades all of the intrinsics to use fneg instructions to convert fma into fmsub/fnmsub/fnmadd/fmsubadd. And uses a select instruction for masking. This matches how clang uses the intrinsics these days. llvm-svn: 336409
This commit is contained in:
parent
4ea8949697
commit
7b35585ff1
|
@ -1912,206 +1912,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmadd_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmadd_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmadd_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmadd_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_vfmadd_pd_512 :
|
def int_x86_avx512_vfmadd_pd_512 :
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
Intrinsic<[llvm_v8f64_ty],
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
|
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmadd_pd_512 :
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmadd_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmadd_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmadd_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmadd_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmadd_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_vfmadd_ps_512 :
|
def int_x86_avx512_vfmadd_ps_512 :
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
Intrinsic<[llvm_v16f32_ty],
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
|
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_ps_512 : // FIXME: remove this intrinsic.
|
// TODO: Can we use 2 vfmadds+shufflevector?
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmadd_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmadd_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmaddsub_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmaddsub_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmaddsub_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmaddsub_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmaddsub_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmaddsub_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_vfmaddsub_pd_512 :
|
def int_x86_avx512_vfmaddsub_pd_512 :
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
Intrinsic<[llvm_v8f64_ty],
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
|
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmaddsub_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmaddsub_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmaddsub_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmaddsub_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmaddsub_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmaddsub_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmaddsub_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmaddsub_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmaddsub_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_vfmaddsub_ps_512 :
|
def int_x86_avx512_vfmaddsub_ps_512 :
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
Intrinsic<[llvm_v16f32_ty],
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
|
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmaddsub_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmaddsub_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_maskz_vfmaddsub_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfmadd_sd :
|
def int_x86_avx512_mask_vfmadd_sd :
|
||||||
GCCBuiltin<"__builtin_ia32_vfmaddsd3_mask">,
|
GCCBuiltin<"__builtin_ia32_vfmaddsd3_mask">,
|
||||||
|
@ -2161,96 +1982,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
|
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsub_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsub_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsub_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsub_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsub_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsub_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsubadd_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsubadd_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsubadd_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsubadd_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsubadd_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfmsubadd_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmadd_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmadd_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmadd_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmadd_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmadd_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmadd_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_sd :
|
def int_x86_avx512_mask3_vfnmsub_sd :
|
||||||
GCCBuiltin<"__builtin_ia32_vfnmsubsd3_mask3">,
|
GCCBuiltin<"__builtin_ia32_vfnmsubsd3_mask3">,
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
Intrinsic<[llvm_v2f64_ty],
|
||||||
|
@ -2263,66 +1994,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
|
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmsub_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_pd_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v2f64_ty],
|
|
||||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmsub_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_pd_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f64_ty],
|
|
||||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmsub_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_pd_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f64_ty],
|
|
||||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmsub_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_ps_128 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v4f32_ty],
|
|
||||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmsub_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_ps_256 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v8f32_ty],
|
|
||||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask_vfnmsub_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_mask3_vfnmsub_ps_512 : // FIXME: remove this intrinsic.
|
|
||||||
Intrinsic<[llvm_v16f32_ty],
|
|
||||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
|
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_x86_avx512_vpmadd52h_uq_128 :
|
def int_x86_avx512_vpmadd52h_uq_128 :
|
||||||
GCCBuiltin<"__builtin_ia32_vpmadd52huq128">,
|
GCCBuiltin<"__builtin_ia32_vpmadd52huq128">,
|
||||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||||
|
|
|
@ -80,6 +80,17 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||||
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
|
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
|
||||||
Name.startswith("fma.vfnmadd.") || // Added in 7.0
|
Name.startswith("fma.vfnmadd.") || // Added in 7.0
|
||||||
Name.startswith("fma.vfnmsub.") || // Added in 7.0
|
Name.startswith("fma.vfnmsub.") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask.vfmadd.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask.vfnmadd.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask.vfnmsub.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask3.vfmadd.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.maskz.vfmadd.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask3.vfmsub.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask3.vfnmsub.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask.vfmaddsub.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.maskz.vfmaddsub.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask3.vfmaddsub.p") || // Added in 7.0
|
||||||
|
Name.startswith("avx512.mask3.vfmsubadd.p") || // Added in 7.0
|
||||||
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
|
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
|
||||||
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
|
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
|
||||||
Name.startswith("avx512.kunpck") || //added in 6.0
|
Name.startswith("avx512.kunpck") || //added in 6.0
|
||||||
|
@ -2779,13 +2790,74 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||||
if (IsScalar)
|
if (IsScalar)
|
||||||
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
|
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
|
||||||
(uint64_t)0);
|
(uint64_t)0);
|
||||||
|
} else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
|
||||||
|
Name.startswith("avx512.mask.vfnmadd.p") ||
|
||||||
|
Name.startswith("avx512.mask.vfnmsub.p") ||
|
||||||
|
Name.startswith("avx512.mask3.vfmadd.p") ||
|
||||||
|
Name.startswith("avx512.mask3.vfmsub.p") ||
|
||||||
|
Name.startswith("avx512.mask3.vfnmsub.p") ||
|
||||||
|
Name.startswith("avx512.maskz.vfmadd.p"))) {
|
||||||
|
bool IsMask3 = Name[11] == '3';
|
||||||
|
bool IsMaskZ = Name[11] == 'z';
|
||||||
|
// Drop the "avx512.mask." to make it easier.
|
||||||
|
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
|
||||||
|
bool NegMul = Name[2] == 'n';
|
||||||
|
bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
|
||||||
|
|
||||||
|
if (CI->getNumArgOperands() == 5 &&
|
||||||
|
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
|
||||||
|
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
|
||||||
|
Intrinsic::ID IID;
|
||||||
|
// Check the character before ".512" in string.
|
||||||
|
if (Name[Name.size()-5] == 's')
|
||||||
|
IID = Intrinsic::x86_avx512_vfmadd_ps_512;
|
||||||
|
else
|
||||||
|
IID = Intrinsic::x86_avx512_vfmadd_pd_512;
|
||||||
|
|
||||||
|
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||||
|
CI->getArgOperand(2), CI->getArgOperand(4) };
|
||||||
|
|
||||||
|
if (NegMul) {
|
||||||
|
if (IsMaskZ || IsMask3)
|
||||||
|
Ops[0] = Builder.CreateFNeg(Ops[0]);
|
||||||
|
else
|
||||||
|
Ops[1] = Builder.CreateFNeg(Ops[1]);
|
||||||
|
}
|
||||||
|
if (NegAcc)
|
||||||
|
Ops[2] = Builder.CreateFNeg(Ops[2]);
|
||||||
|
|
||||||
|
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||||
|
Ops);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||||
|
CI->getArgOperand(2) };
|
||||||
|
|
||||||
|
if (NegMul) {
|
||||||
|
if (IsMaskZ || IsMask3)
|
||||||
|
Ops[0] = Builder.CreateFNeg(Ops[0]);
|
||||||
|
else
|
||||||
|
Ops[1] = Builder.CreateFNeg(Ops[1]);
|
||||||
|
}
|
||||||
|
if (NegAcc)
|
||||||
|
Ops[2] = Builder.CreateFNeg(Ops[2]);
|
||||||
|
|
||||||
|
Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
|
||||||
|
Intrinsic::fma,
|
||||||
|
Ops[0]->getType());
|
||||||
|
Rep = Builder.CreateCall(FMA, Ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
|
||||||
|
IsMask3 ? CI->getArgOperand(2) :
|
||||||
|
CI->getArgOperand(0);
|
||||||
|
|
||||||
|
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
|
||||||
} else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
|
} else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
|
||||||
Name.startswith("fma.vfmsubadd.p"))) {
|
Name.startswith("fma.vfmsubadd.p"))) {
|
||||||
bool IsSubAdd = Name[7] == 's';
|
bool IsSubAdd = Name[7] == 's';
|
||||||
int NumElts = CI->getType()->getVectorNumElements();
|
int NumElts = CI->getType()->getVectorNumElements();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||||
CI->getArgOperand(2) };
|
CI->getArgOperand(2) };
|
||||||
|
|
||||||
|
@ -2803,6 +2875,60 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||||
Idxs[i] = i + (i % 2) * NumElts;
|
Idxs[i] = i + (i % 2) * NumElts;
|
||||||
|
|
||||||
Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
|
Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
|
||||||
|
} else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
|
||||||
|
Name.startswith("avx512.mask3.vfmaddsub.p") ||
|
||||||
|
Name.startswith("avx512.maskz.vfmaddsub.p") ||
|
||||||
|
Name.startswith("avx512.mask3.vfmsubadd.p"))) {
|
||||||
|
bool IsMask3 = Name[11] == '3';
|
||||||
|
bool IsMaskZ = Name[11] == 'z';
|
||||||
|
// Drop the "avx512.mask." to make it easier.
|
||||||
|
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
|
||||||
|
bool IsSubAdd = Name[3] == 's';
|
||||||
|
if (CI->getNumArgOperands() == 5 &&
|
||||||
|
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
|
||||||
|
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
|
||||||
|
Intrinsic::ID IID;
|
||||||
|
// Check the character before ".512" in string.
|
||||||
|
if (Name[Name.size()-5] == 's')
|
||||||
|
IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
|
||||||
|
else
|
||||||
|
IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
|
||||||
|
|
||||||
|
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||||
|
CI->getArgOperand(2), CI->getArgOperand(4) };
|
||||||
|
if (IsSubAdd)
|
||||||
|
Ops[2] = Builder.CreateFNeg(Ops[2]);
|
||||||
|
|
||||||
|
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
|
||||||
|
{CI->getArgOperand(0), CI->getArgOperand(1),
|
||||||
|
CI->getArgOperand(2), CI->getArgOperand(4)});
|
||||||
|
} else {
|
||||||
|
int NumElts = CI->getType()->getVectorNumElements();
|
||||||
|
|
||||||
|
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
|
||||||
|
CI->getArgOperand(2) };
|
||||||
|
|
||||||
|
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
|
||||||
|
Ops[0]->getType());
|
||||||
|
Value *Odd = Builder.CreateCall(FMA, Ops);
|
||||||
|
Ops[2] = Builder.CreateFNeg(Ops[2]);
|
||||||
|
Value *Even = Builder.CreateCall(FMA, Ops);
|
||||||
|
|
||||||
|
if (IsSubAdd)
|
||||||
|
std::swap(Even, Odd);
|
||||||
|
|
||||||
|
SmallVector<uint32_t, 32> Idxs(NumElts);
|
||||||
|
for (int i = 0; i != NumElts; ++i)
|
||||||
|
Idxs[i] = i + (i % 2) * NumElts;
|
||||||
|
|
||||||
|
Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
|
||||||
|
IsMask3 ? CI->getArgOperand(2) :
|
||||||
|
CI->getArgOperand(0);
|
||||||
|
|
||||||
|
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
|
||||||
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
|
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
|
||||||
Name.startswith("avx512.maskz.pternlog."))) {
|
Name.startswith("avx512.maskz.pternlog."))) {
|
||||||
bool ZeroMask = Name[11] == 'z';
|
bool ZeroMask = Name[11] == 'z';
|
||||||
|
|
|
@ -20678,7 +20678,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
// Swap Src1 and Src2 in the node creation
|
// Swap Src1 and Src2 in the node creation
|
||||||
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
|
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
|
||||||
}
|
}
|
||||||
case FMA_OP_MASK3:
|
|
||||||
case FMA_OP_MASKZ:
|
case FMA_OP_MASKZ:
|
||||||
case FMA_OP_MASK: {
|
case FMA_OP_MASK: {
|
||||||
SDValue Src1 = Op.getOperand(1);
|
SDValue Src1 = Op.getOperand(1);
|
||||||
|
@ -20691,8 +20690,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
// set PassThru element
|
// set PassThru element
|
||||||
if (IntrData->Type == FMA_OP_MASKZ)
|
if (IntrData->Type == FMA_OP_MASKZ)
|
||||||
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
||||||
else if (IntrData->Type == FMA_OP_MASK3)
|
|
||||||
PassThru = Src3;
|
|
||||||
else
|
else
|
||||||
PassThru = Src1;
|
PassThru = Src1;
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ enum IntrinsicType : uint16_t {
|
||||||
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
|
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
|
||||||
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
||||||
INTR_TYPE_3OP_MASK,
|
INTR_TYPE_3OP_MASK,
|
||||||
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
|
FMA_OP_MASK, FMA_OP_MASKZ,
|
||||||
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
|
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
|
||||||
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
|
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
|
||||||
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
|
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
|
||||||
|
@ -878,43 +878,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86ISD::CVTPS2PH, 0),
|
X86ISD::CVTPS2PH, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
|
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
|
||||||
X86ISD::CVTPS2PH, 0),
|
X86ISD::CVTPS2PH, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, ISD::FMA,
|
|
||||||
X86ISD::FMADD_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, ISD::FMA,
|
|
||||||
X86ISD::FMADD_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
|
|
||||||
X86ISD::FMADDSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
|
|
||||||
X86ISD::FMADDSUB_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
|
|
||||||
X86ISD::FNMADD_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
|
|
||||||
X86ISD::FNMADD_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
|
|
||||||
X86ISD::FNMSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
|
|
||||||
X86ISD::FNMSUB_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
|
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
|
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
|
||||||
|
@ -942,54 +908,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
|
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
|
||||||
X86ISD::VPSHUFBITQMB, 0),
|
X86ISD::VPSHUFBITQMB, 0),
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA,
|
|
||||||
X86ISD::FMADD_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, ISD::FMA,
|
|
||||||
X86ISD::FMADD_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
|
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
|
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
|
|
||||||
X86ISD::FMADDSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
|
|
||||||
X86ISD::FMADDSUB_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB,
|
|
||||||
X86ISD::FMSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
|
|
||||||
X86ISD::FMSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
|
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
|
X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
|
|
||||||
X86ISD::FMSUBADD_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
|
|
||||||
X86ISD::FMSUBADD_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB,
|
|
||||||
X86ISD::FNMSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
|
|
||||||
X86ISD::FNMSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
|
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
|
X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
|
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
|
||||||
|
@ -1008,25 +932,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86ISD::VFIXUPIMMS, 0),
|
X86ISD::VFIXUPIMMS, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
|
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
|
||||||
X86ISD::VFIXUPIMMS, 0),
|
X86ISD::VFIXUPIMMS, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, ISD::FMA,
|
|
||||||
X86ISD::FMADD_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, ISD::FMA, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, ISD::FMA,
|
|
||||||
X86ISD::FMADD_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
|
|
||||||
X86ISD::FMADDSUB_RND),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
|
|
||||||
X86ISD::FMADDSUB_RND),
|
|
||||||
|
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
|
||||||
|
|
|
@ -11,10 +11,10 @@ define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <
|
||||||
; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2]
|
; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2
|
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
|
||||||
ret <16 x float> %res
|
%2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2)
|
||||||
|
ret <16 x float> %2
|
||||||
}
|
}
|
||||||
declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
|
||||||
|
|
||||||
define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||||
; X86-LABEL: test_mask_vfnmadd_ps:
|
; X86-LABEL: test_mask_vfnmadd_ps:
|
||||||
|
@ -30,8 +30,11 @@ define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <1
|
||||||
; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
|
; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
|
||||||
ret <16 x float> %res
|
%2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2)
|
||||||
|
%3 = bitcast i16 %mask to <16 x i1>
|
||||||
|
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %a0
|
||||||
|
ret <16 x float> %4
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -40,10 +43,10 @@ define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <
|
||||||
; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2]
|
; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2
|
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2)
|
||||||
|
ret <8 x double> %2
|
||||||
}
|
}
|
||||||
declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
|
||||||
|
|
||||||
define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
; X86-LABEL: test_mask_vfnmadd_pd:
|
; X86-LABEL: test_mask_vfnmadd_pd:
|
||||||
|
@ -60,8 +63,11 @@ define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8
|
||||||
; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
|
; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2)
|
||||||
|
%3 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %a0
|
||||||
|
ret <8 x double> %4
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||||
|
@ -70,10 +76,11 @@ define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <1
|
||||||
; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
|
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2)
|
||||||
|
ret <16 x float> %3
|
||||||
}
|
}
|
||||||
declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
|
||||||
|
|
||||||
define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||||
; X86-LABEL: test_mask_vfnmsub_ps:
|
; X86-LABEL: test_mask_vfnmsub_ps:
|
||||||
|
@ -89,8 +96,12 @@ define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <1
|
||||||
; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2)
|
||||||
|
%4 = bitcast i16 %mask to <16 x i1>
|
||||||
|
%5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %a0
|
||||||
|
ret <16 x float> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -99,10 +110,11 @@ define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8
|
||||||
; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
|
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
|
||||||
|
|
||||||
define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
; X86-LABEL: test_mask_vfnmsub_pd:
|
; X86-LABEL: test_mask_vfnmsub_pd:
|
||||||
|
@ -119,8 +131,12 @@ define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8
|
||||||
; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
|
||||||
|
%4 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||||
|
@ -129,7 +145,7 @@ define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1,
|
||||||
; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2]
|
; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind
|
%res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) #2
|
||||||
ret <16 x float> %res
|
ret <16 x float> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,7 +177,7 @@ define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1,
|
||||||
; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2]
|
; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind
|
%res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2
|
||||||
ret <8 x double> %res
|
ret <8 x double> %res
|
||||||
}
|
}
|
||||||
declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) nounwind readnone
|
declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) nounwind readnone
|
||||||
|
@ -181,7 +197,7 @@ define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1,
|
||||||
; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
|
; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
|
||||||
; X64-NEXT: # zmm0 = (zmm0 * zmm1) +/- zmm2
|
; X64-NEXT: # zmm0 = (zmm0 * zmm1) +/- zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind
|
%res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2
|
||||||
%bc = bitcast i8 %mask to <8 x i1>
|
%bc = bitcast i8 %mask to <8 x i1>
|
||||||
%sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
|
%sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
|
||||||
ret <8 x double> %sel
|
ret <8 x double> %sel
|
||||||
|
@ -208,8 +224,6 @@ define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0,
|
||||||
ret <8 x double> %sel
|
ret <8 x double> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -227,12 +241,15 @@ define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0,
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) +/- zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) +/- zmm2
|
||||||
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
|
||||||
|
%4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||||
|
%5 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2
|
||||||
|
ret <8 x double> %6
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
|
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -248,8 +265,13 @@ define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0,
|
||||||
; X64-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
|
; X64-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
|
||||||
; X64-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
; X64-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
|
||||||
|
%4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||||
|
%5 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer
|
||||||
|
ret <8 x double> %6
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
|
@ -272,8 +294,6 @@ define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0,
|
||||||
ret <16 x float> %sel
|
ret <16 x float> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -290,12 +310,15 @@ define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0,
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) +/- zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) +/- zmm2
|
||||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
|
||||||
|
%4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
||||||
|
%5 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2
|
||||||
|
ret <16 x float> %6
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
|
; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -310,12 +333,15 @@ define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0,
|
||||||
; X64-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
|
; X64-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
|
||||||
; X64-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
; X64-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
|
||||||
|
%4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
||||||
|
%5 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer
|
||||||
|
ret <16 x float> %6
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -333,12 +359,15 @@ define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0,
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) -/+ zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) -/+ zmm2
|
||||||
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
|
||||||
|
%4 = shufflevector <8 x double> %1, <8 x double> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||||
|
%5 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2
|
||||||
|
ret <8 x double> %6
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -355,8 +384,13 @@ define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0,
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) -/+ zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) -/+ zmm2
|
||||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
|
||||||
|
%4 = shufflevector <16 x float> %1, <16 x float> %3, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
||||||
|
%5 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2
|
||||||
|
ret <16 x float> %6
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
|
||||||
|
@ -497,8 +531,6 @@ define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0,
|
||||||
ret <16 x float> %res
|
ret <16 x float> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -516,12 +548,13 @@ define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
|
||||||
ret <8 x double> %res
|
%2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %1)
|
||||||
|
%3 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x2
|
||||||
|
ret <8 x double> %4
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -538,8 +571,11 @@ define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <1
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
|
||||||
ret <16 x float> %res
|
%2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %1)
|
||||||
|
%3 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x2
|
||||||
|
ret <16 x float> %4
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
|
@ -706,8 +742,6 @@ define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8
|
||||||
ret <8 x double> %sel
|
ret <8 x double> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -725,12 +759,12 @@ define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) + zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) + zmm2
|
||||||
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
|
||||||
ret <8 x double> %res
|
%2 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
|
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -746,8 +780,10 @@ define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8
|
||||||
; X64-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
|
; X64-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
|
||||||
; X64-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2
|
; X64-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
|
||||||
ret <8 x double> %res
|
%2 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
|
@ -770,8 +806,6 @@ define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16
|
||||||
ret <16 x float> %sel
|
ret <16 x float> %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -788,13 +822,13 @@ define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <1
|
||||||
; X64-NEXT: # zmm2 = (zmm0 * zmm1) + zmm2
|
; X64-NEXT: # zmm2 = (zmm0 * zmm1) + zmm2
|
||||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
|
||||||
ret <16 x float> %res
|
%2 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
|
||||||
|
ret <16 x float> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
define <16 x float> @test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
|
||||||
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
|
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||||
|
@ -808,11 +842,12 @@ define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <1
|
||||||
; X64-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
|
; X64-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
|
||||||
; X64-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2
|
; X64-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
|
||||||
ret <16 x float> %res
|
%2 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
|
||||||
|
ret <16 x float> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
|
; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -826,8 +861,12 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 0)
|
||||||
|
%4 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
|
@ -843,8 +882,12 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 1)
|
||||||
|
%4 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
|
@ -860,8 +903,12 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 2)
|
||||||
|
%4 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
|
@ -877,8 +924,12 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8
|
||||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||||
; X64-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 3)
|
||||||
|
%4 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
|
||||||
|
@ -896,8 +947,12 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0,
|
||||||
; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
|
||||||
|
%4 = bitcast i8 %mask to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -905,8 +960,10 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 0)
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -914,8 +971,10 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 1)
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -923,8 +982,10 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 2)
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -932,8 +993,10 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 3)
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||||
|
@ -942,8 +1005,10 @@ define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0
|
||||||
; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
|
; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
|
||||||
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
|
; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
|
||||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
|
||||||
|
ret <8 x double> %3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
|
@ -961,12 +1026,14 @@ define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8
|
||||||
; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %2)
|
||||||
|
%4 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x0
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -984,8 +1051,12 @@ define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <
|
||||||
; X64-NEXT: # zmm2 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm2 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0
|
||||||
ret <8 x double> %res
|
%2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
|
||||||
|
%3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %1, <8 x double> %x1, <8 x double> %2)
|
||||||
|
%4 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x2
|
||||||
|
ret <8 x double> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
|
@ -1002,12 +1073,14 @@ define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <1
|
||||||
; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
|
; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %2)
|
||||||
|
%4 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x0
|
||||||
|
ret <16 x float> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
|
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
|
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
|
@ -1024,8 +1097,12 @@ define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <
|
||||||
; X64-NEXT: # zmm2 = -(zmm0 * zmm1) - zmm2
|
; X64-NEXT: # zmm2 = -(zmm0 * zmm1) - zmm2
|
||||||
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
|
||||||
ret <16 x float> %res
|
%2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
|
||||||
|
%3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %1, <16 x float> %x1, <16 x float> %2)
|
||||||
|
%4 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x2
|
||||||
|
ret <16 x float> %5
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
|
||||||
|
@ -1043,8 +1120,11 @@ define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8
|
||||||
; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
|
; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
|
%1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1
|
||||||
ret <8 x double> %res
|
%2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %x2)
|
||||||
|
%3 = bitcast i8 %x3 to <8 x i1>
|
||||||
|
%4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x0
|
||||||
|
ret <8 x double> %4
|
||||||
}
|
}
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
|
||||||
|
@ -1061,6 +1141,12 @@ define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <1
|
||||||
; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
|
; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
|
||||||
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
; X64-NEXT: # zmm0 = -(zmm0 * zmm1) + zmm2
|
||||||
; X64-NEXT: retq # encoding: [0xc3]
|
; X64-NEXT: retq # encoding: [0xc3]
|
||||||
%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
|
%1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1
|
||||||
ret <16 x float> %res
|
%2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %x2)
|
||||||
|
%3 = bitcast i16 %x3 to <16 x i1>
|
||||||
|
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x0
|
||||||
|
ret <16 x float> %4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
|
||||||
|
declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue