forked from OSchip/llvm-project
[AVX512] Switch FMA intrinsics to the masking version
This does the renaming and updates the lowering logic. Part of <rdar://problem/17688758> llvm-svn: 215664
This commit is contained in:
parent
9b4f08c729
commit
6fa675754a
|
@ -1984,13 +1984,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512">,
|
||||
def int_x86_fma_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512">,
|
||||
def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
|
@ -2016,13 +2018,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512">,
|
||||
def int_x86_fma_mask_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512">,
|
||||
def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
|
@ -2048,13 +2052,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512">,
|
||||
def int_x86_fma_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512">,
|
||||
def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
|
@ -2080,13 +2086,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512">,
|
||||
def int_x86_fma_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512">,
|
||||
def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
|
@ -2106,13 +2114,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512">,
|
||||
def int_x86_fma_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512">,
|
||||
def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
|
@ -2132,13 +2142,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512">,
|
||||
def int_x86_fma_mask_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512">,
|
||||
def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
|
|
|
@ -14246,43 +14246,43 @@ static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
|
|||
case Intrinsic::x86_fma_vfmadd_pd:
|
||||
case Intrinsic::x86_fma_vfmadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmadd_pd_256:
|
||||
case Intrinsic::x86_fma_vfmadd_ps_512:
|
||||
case Intrinsic::x86_fma_vfmadd_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmadd_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmadd_pd_512:
|
||||
return X86ISD::FMADD;
|
||||
case Intrinsic::x86_fma_vfmsub_ps:
|
||||
case Intrinsic::x86_fma_vfmsub_pd:
|
||||
case Intrinsic::x86_fma_vfmsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfmsub_ps_512:
|
||||
case Intrinsic::x86_fma_vfmsub_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsub_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsub_pd_512:
|
||||
return X86ISD::FMSUB;
|
||||
case Intrinsic::x86_fma_vfnmadd_ps:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd:
|
||||
case Intrinsic::x86_fma_vfnmadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd_256:
|
||||
case Intrinsic::x86_fma_vfnmadd_ps_512:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
|
||||
return X86ISD::FNMADD;
|
||||
case Intrinsic::x86_fma_vfnmsub_ps:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd:
|
||||
case Intrinsic::x86_fma_vfnmsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfnmsub_ps_512:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
|
||||
return X86ISD::FNMSUB;
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd:
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps_256:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps_512:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
|
||||
return X86ISD::FMADDSUB;
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps_512:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
|
||||
return X86ISD::FMSUBADD;
|
||||
}
|
||||
}
|
||||
|
@ -14919,6 +14919,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
|
||||
return DAG.getNode(Opcode, dl, VTs, NewOps);
|
||||
}
|
||||
|
||||
case Intrinsic::x86_fma_mask_vfmadd_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmadd_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsub_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsub_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
|
||||
case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
|
||||
auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
|
||||
if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
|
||||
return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
|
||||
dl, Op.getValueType(),
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3)),
|
||||
Op.getOperand(4), Op.getOperand(1), DAG);
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
case Intrinsic::x86_fma_vfmadd_ps:
|
||||
case Intrinsic::x86_fma_vfmadd_pd:
|
||||
case Intrinsic::x86_fma_vfmsub_ps:
|
||||
|
@ -14943,18 +14968,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
|||
case Intrinsic::x86_fma_vfmaddsub_pd_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps_256:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd_256:
|
||||
case Intrinsic::x86_fma_vfmadd_ps_512:
|
||||
case Intrinsic::x86_fma_vfmadd_pd_512:
|
||||
case Intrinsic::x86_fma_vfmsub_ps_512:
|
||||
case Intrinsic::x86_fma_vfmsub_pd_512:
|
||||
case Intrinsic::x86_fma_vfnmadd_ps_512:
|
||||
case Intrinsic::x86_fma_vfnmadd_pd_512:
|
||||
case Intrinsic::x86_fma_vfnmsub_ps_512:
|
||||
case Intrinsic::x86_fma_vfnmsub_pd_512:
|
||||
case Intrinsic::x86_fma_vfmaddsub_ps_512:
|
||||
case Intrinsic::x86_fma_vfmaddsub_pd_512:
|
||||
case Intrinsic::x86_fma_vfmsubadd_ps_512:
|
||||
case Intrinsic::x86_fma_vfmsubadd_pd_512:
|
||||
return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||
}
|
||||
|
|
|
@ -1,97 +1,99 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
|
||||
|
||||
define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmadd_ps_z
|
||||
; CHECK: vfmadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmadd_pd_z
|
||||
; CHECK: vfmadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
|
||||
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
|
||||
|
||||
define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubps_z
|
||||
; CHECK: vfmsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubpd_z
|
||||
; CHECK: vfmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_ps_z
|
||||
; CHECK: vfnmadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmadd_pd_z
|
||||
; CHECK: vfnmadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubps_z
|
||||
; CHECK: vfnmsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfnmsubpd_z
|
||||
; CHECK: vfnmsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmaddsubps_z
|
||||
; CHECK: vfmaddsub213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
|
||||
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmaddsubpd_z
|
||||
; CHECK: vfmaddsub213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubaddps_z
|
||||
; CHECK: vfmsubadd213ps %zmm
|
||||
%res = call <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
|
||||
%res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
|
||||
ret <16 x float> %res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
|
||||
declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
|
||||
|
||||
define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
|
||||
; CHECK-LABEL: test_x86_vfmsubaddpd_z
|
||||
; CHECK: vfmsubadd213pd %zmm
|
||||
%res = call <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
|
||||
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
|
||||
declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
|
Loading…
Reference in New Issue