forked from OSchip/llvm-project
[X86] Remove and autoupgrade a bunch of FMA instrinsics that are no longer used by clang.
llvm-svn: 332146
This commit is contained in:
parent
669375814c
commit
a17d627abb
|
@ -2200,78 +2200,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_fma_vfmsub_ss : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_sd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_ps : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_pd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_ps_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsub_pd_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ss : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_sd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ps : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_pd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_ps_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmadd_pd_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ss : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_sd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ps : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_pd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_ps_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfnmsub_pd_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
|
@ -2290,22 +2218,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_ps : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_pd : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_ps_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_fma_vfmsubadd_pd_256 : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vfmadd_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
|
||||
|
|
|
@ -87,6 +87,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
if (Name=="ssse3.pabs.b.128" || // Added in 6.0
|
||||
Name=="ssse3.pabs.w.128" || // Added in 6.0
|
||||
Name=="ssse3.pabs.d.128" || // Added in 6.0
|
||||
Name.startswith("fma.vfmsub.") || // Added in 7.0
|
||||
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
|
||||
Name.startswith("fma.vfnmadd.") || // Added in 7.0
|
||||
Name.startswith("fma.vfnmsub.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
|
||||
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
|
||||
Name.startswith("avx512.kunpck") || //added in 6.0
|
||||
|
@ -2360,6 +2364,85 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
}
|
||||
} else if (IsX86 && Name.startswith("fma.vfmsub")) {
|
||||
// Handle FMSUB and FSUBADD.
|
||||
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (Name[10] == '.' && Name[11] == 'p') {
|
||||
// Packed FMSUB
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmadd_ps;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmadd_pd;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmadd_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmadd_pd_256;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else if (Name[10] == '.' && Name[11] == 's') {
|
||||
// Scalar FMSUB
|
||||
if (EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmadd_ss;
|
||||
else if (EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmadd_sd;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else {
|
||||
// FMSUBADD
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_ps;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_pd;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
}
|
||||
Value *Arg2 = Builder.CreateFNeg(CI->getArgOperand(2));
|
||||
Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), Arg2 };
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Ops);
|
||||
} else if (IsX86 && (Name.startswith("fma.vfnmadd.") ||
|
||||
Name.startswith("fma.vfnmsub."))) {
|
||||
Value *Arg0 = CI->getArgOperand(0);
|
||||
Value *Arg1 = CI->getArgOperand(1);
|
||||
Value *Arg2 = CI->getArgOperand(2);
|
||||
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (Name[12] == 'p') {
|
||||
// Packed FNMADD/FNSUB
|
||||
Arg0 = Builder.CreateFNeg(Arg0);
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmadd_ps;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmadd_pd;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmadd_ps_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmadd_pd_256;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else {
|
||||
// Scalar FNMADD/FNMSUB
|
||||
Arg1 = Builder.CreateFNeg(Arg1); // Arg0 is passthru so invert Arg1.
|
||||
if (EltWidth == 32)
|
||||
IID = Intrinsic::x86_fma_vfmadd_ss;
|
||||
else if (EltWidth == 64)
|
||||
IID = Intrinsic::x86_fma_vfmadd_sd;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
}
|
||||
// Invert for FNMSUB.
|
||||
if (Name[8] == 's')
|
||||
Arg2 = Builder.CreateFNeg(Arg2);
|
||||
Value *Ops[] = { Arg0, Arg1, Arg2 };
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Ops);
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.") &&
|
||||
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
|
||||
// Rep will be updated by the call in the condition.
|
||||
|
|
|
@ -1536,28 +1536,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsub_pd, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfmsubadd_ps_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmadd_pd, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
|
||||
X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
|
||||
X86_INTRINSIC_DATA(fma4_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
|
||||
X86_INTRINSIC_DATA(fma4_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
|
||||
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
|
||||
|
|
|
@ -2402,13 +2402,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
|
||||
case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
|
||||
case Intrinsic::x86_fma_vfmadd_ss:
|
||||
case Intrinsic::x86_fma_vfmsub_ss:
|
||||
case Intrinsic::x86_fma_vfnmadd_ss:
|
||||
case Intrinsic::x86_fma_vfnmsub_ss:
|
||||
case Intrinsic::x86_fma_vfmadd_sd:
|
||||
case Intrinsic::x86_fma_vfmsub_sd:
|
||||
case Intrinsic::x86_fma_vfnmadd_sd:
|
||||
case Intrinsic::x86_fma_vfnmsub_sd:
|
||||
case Intrinsic::x86_sse_cmp_ss:
|
||||
case Intrinsic::x86_sse_min_ss:
|
||||
case Intrinsic::x86_sse_max_ss:
|
||||
|
|
|
@ -1387,13 +1387,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||
case Intrinsic::x86_avx512_mask_max_sd_round:
|
||||
case Intrinsic::x86_avx512_mask_min_sd_round:
|
||||
case Intrinsic::x86_fma_vfmadd_ss:
|
||||
case Intrinsic::x86_fma_vfmsub_ss:
|
||||
case Intrinsic::x86_fma_vfnmadd_ss:
|
||||
case Intrinsic::x86_fma_vfnmsub_ss:
|
||||
case Intrinsic::x86_fma_vfmadd_sd:
|
||||
case Intrinsic::x86_fma_vfmsub_sd:
|
||||
case Intrinsic::x86_fma_vfnmadd_sd:
|
||||
case Intrinsic::x86_fma_vfnmsub_sd:
|
||||
case Intrinsic::x86_avx512_mask_vfmadd_ss:
|
||||
case Intrinsic::x86_avx512_mask_vfmadd_sd:
|
||||
case Intrinsic::x86_avx512_maskz_vfmadd_ss:
|
||||
|
|
|
@ -43,16 +43,16 @@ declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x
|
|||
define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; X32-LABEL: test3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
|
||||
; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
|
||||
; X32-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
|
||||
; X32-NEXT: vxorps %xmm3, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test3:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
|
||||
; X64-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
|
||||
; X64-NEXT: vxorps %xmm3, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
|
||||
|
@ -81,16 +81,12 @@ entry:
|
|||
define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; X32-LABEL: test5:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm3
|
||||
; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2
|
||||
; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; X32-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test5:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
|
||||
; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2
|
||||
; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; X64-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
|
||||
|
|
|
@ -89,18 +89,10 @@ entry:
|
|||
}
|
||||
|
||||
define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; SKX-LABEL: test8:
|
||||
; SKX: # %bb.0: # %entry
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm2, %ymm2
|
||||
; SKX-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test8:
|
||||
; KNL: # %bb.0: # %entry
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
|
||||
; KNL-NEXT: vxorps %ymm3, %ymm2, %ymm2
|
||||
; KNL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
|
||||
%0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
|
||||
|
|
|
@ -80,236 +80,3 @@ define double @test_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %
|
|||
ret double %3
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
|
||||
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
|
||||
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
|
||||
%res = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
%res = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfmsub_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_vfnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
|
||||
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
|
||||
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
|
||||
%res = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfnmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfnmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
%res = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfnmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfnmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmadd_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <4 x float> @test_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
|
||||
%5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
|
||||
%6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
|
||||
%res = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define float @test_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret float [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
ret float %5
|
||||
}
|
||||
|
||||
define float @test_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_ss_1(
|
||||
; CHECK-NEXT: ret float 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
ret float %5
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
define <2 x double> @test_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
|
||||
%2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
|
||||
%res = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define double @test_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 0
|
||||
ret double %3
|
||||
}
|
||||
|
||||
define double @test_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: @test_vfnmsub_sd_1(
|
||||
; CHECK-NEXT: ret double 1.000000e+00
|
||||
;
|
||||
%1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
|
||||
%3 = extractelement <2 x double> %2, i32 1
|
||||
ret double %3
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue