[X86] X86InstComments - add FMA4 comments

These typically match the FMA3 equivalents, although the multiply operands sometimes get flipped due to the FMA3 permute variants.
This commit is contained in:
Simon Pilgrim 2020-02-08 17:01:04 +00:00
parent 10417ad2e4
commit 4aa7b9cc96
14 changed files with 473 additions and 268 deletions

View File

@ -199,6 +199,40 @@ using namespace llvm;
CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
#define CASE_FMA4(Inst, suf) \
CASE_AVX_INS_COMMON(Inst, 4, suf) \
CASE_AVX_INS_COMMON(Inst, 4Y, suf)
#define CASE_FMA4_PACKED_RR(Inst) \
CASE_FMA4(Inst##PD, rr) \
CASE_FMA4(Inst##PS, rr)
#define CASE_FMA4_PACKED_RM(Inst) \
CASE_FMA4(Inst##PD, rm) \
CASE_FMA4(Inst##PS, rm)
#define CASE_FMA4_PACKED_MR(Inst) \
CASE_FMA4(Inst##PD, mr) \
CASE_FMA4(Inst##PS, mr)
#define CASE_FMA4_SCALAR_RR(Inst) \
CASE_AVX_INS_COMMON(Inst##SD4, , rr) \
CASE_AVX_INS_COMMON(Inst##SS4, , rr) \
CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \
CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int)
#define CASE_FMA4_SCALAR_RM(Inst) \
CASE_AVX_INS_COMMON(Inst##SD4, , rm) \
CASE_AVX_INS_COMMON(Inst##SS4, , rm) \
CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \
CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int)
#define CASE_FMA4_SCALAR_MR(Inst) \
CASE_AVX_INS_COMMON(Inst##SD4, , mr) \
CASE_AVX_INS_COMMON(Inst##SS4, , mr) \
CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \
CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
static unsigned getVectorRegSize(unsigned RegNo) {
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
return 512;
@ -247,14 +281,14 @@ static void printMasking(raw_ostream &OS, const MCInst *MI,
OS << " {z}";
}
static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
static bool printFMAComments(const MCInst *MI, raw_ostream &OS) {
const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
bool Negate = false;
StringRef AccStr = "+";
// The operands for FMA instructions without rounding fall into two forms.
// The operands for FMA3 instructions without rounding fall into two forms:
// dest, src1, src2, src3
// dest, src1, mask, src2, src3
// Where src3 is either a register or 5 memory address operands. So to find
@ -262,9 +296,118 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
// index from the end by taking into account memory vs register form when
// finding src2.
// The operands for FMA4 instructions:
// dest, src1, src2, src3
// Where src2 OR src3 are either a register or 5 memory address operands. So
// to find dest and src1 we can index from the front, src2 (reg/mem) follows
// and then src3 (reg) will be at the end.
switch (MI->getOpcode()) {
default:
return false;
CASE_FMA4_PACKED_RR(FMADD)
CASE_FMA4_SCALAR_RR(FMADD)
RegForm = true;
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_FMA4_PACKED_RM(FMADD)
CASE_FMA4_SCALAR_RM(FMADD)
Mul2Name = getRegName(MI->getOperand(2).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
break;
CASE_FMA4_PACKED_MR(FMADD)
CASE_FMA4_SCALAR_MR(FMADD)
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
break;
CASE_FMA4_PACKED_RR(FMSUB)
CASE_FMA4_SCALAR_RR(FMSUB)
RegForm = true;
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_FMA4_PACKED_RM(FMSUB)
CASE_FMA4_SCALAR_RM(FMSUB)
Mul2Name = getRegName(MI->getOperand(2).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "-";
break;
CASE_FMA4_PACKED_MR(FMSUB)
CASE_FMA4_SCALAR_MR(FMSUB)
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "-";
break;
CASE_FMA4_PACKED_RR(FNMADD)
CASE_FMA4_SCALAR_RR(FNMADD)
RegForm = true;
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_FMA4_PACKED_RM(FNMADD)
CASE_FMA4_SCALAR_RM(FNMADD)
Mul2Name = getRegName(MI->getOperand(2).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
Negate = true;
break;
CASE_FMA4_PACKED_MR(FNMADD)
CASE_FMA4_SCALAR_MR(FNMADD)
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
Negate = true;
break;
CASE_FMA4_PACKED_RR(FNMSUB)
CASE_FMA4_SCALAR_RR(FNMSUB)
RegForm = true;
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_FMA4_PACKED_RM(FNMSUB)
CASE_FMA4_SCALAR_RM(FNMSUB)
Mul2Name = getRegName(MI->getOperand(2).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "-";
Negate = true;
break;
CASE_FMA4_PACKED_MR(FNMSUB)
CASE_FMA4_SCALAR_MR(FNMSUB)
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "-";
Negate = true;
break;
CASE_FMA4_PACKED_RR(FMADDSUB)
RegForm = true;
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_FMA4_PACKED_RM(FMADDSUB)
Mul2Name = getRegName(MI->getOperand(2).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "+/-";
break;
CASE_FMA4_PACKED_MR(FMADDSUB)
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "+/-";
break;
CASE_FMA4_PACKED_RR(FMSUBADD)
RegForm = true;
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_FMA4_PACKED_RM(FMSUBADD)
Mul2Name = getRegName(MI->getOperand(2).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "-/+";
break;
CASE_FMA4_PACKED_MR(FMSUBADD)
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
Mul1Name = getRegName(MI->getOperand(1).getReg());
AccStr = "-/+";
break;
CASE_FMA_PACKED_REG(FMADD132)
CASE_FMA_SCALAR_REG(FMADD132)
Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
@ -504,7 +647,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
if (printFMA3Comments(MI, OS))
if (printFMAComments(MI, OS))
return true;
switch (MI->getOpcode()) {

View File

@ -5,7 +5,7 @@
define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; CHECK-LABEL: fmafunc:
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retl
;
; CHECK-NOFMA-LABEL: fmafunc:

View File

@ -12,7 +12,7 @@ define float @test_fneg_fma_subx_y_negz_f32(float %w, float %x, float %y, float
; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddss %xmm3, %xmm2, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm3
; FMA4-NEXT: retq
entry:
%subx = fsub nsz float %w, %x
@ -32,7 +32,7 @@ define float @test_fneg_fma_x_suby_negz_f32(float %w, float %x, float %y, float
; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
; FMA4-NEXT: retq
entry:
%suby = fsub nsz float %w, %y
@ -54,7 +54,7 @@ define float @test_fneg_fma_subx_suby_negz_f32(float %w, float %x, float %y, flo
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
; FMA4-NEXT: retq
entry:
%subx = fsub nsz float %w, %x
@ -75,7 +75,7 @@ define float @test_fneg_fma_subx_negy_negz_f32(float %w, float %x, float %y, flo
; FMA4-LABEL: test_fneg_fma_subx_negy_negz_f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss %xmm3, %xmm2, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm3
; FMA4-NEXT: retq
entry:
%subx = fsub nsz float %w, %x
@ -96,7 +96,7 @@ define <4 x float> @test_fma_rcp_fneg_v4f32(<4 x float> %x, <4 x float> %y, <4 x
; FMA4-LABEL: test_fma_rcp_fneg_v4f32:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vrcpps %xmm2, %xmm2
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%0 = fneg <4 x float> %z
@ -118,7 +118,7 @@ define float @negated_constant(float %x) {
; FMA4-LABEL: negated_constant:
; FMA4: # %bb.0:
; FMA4-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
; FMA4-NEXT: vfnmsubss %xmm1, {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1
; FMA4-NEXT: retq
%m = fmul float %x, 42.0
%fma = call nsz float @llvm.fma.f32(float %x, float -42.0, float %m)

View File

@ -8,7 +8,7 @@ define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_baa_ss:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
@ -18,7 +18,7 @@ define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_aba_ss:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -28,7 +28,7 @@ define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_bba_ss:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -39,7 +39,7 @@ define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
@ -49,7 +49,7 @@ define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -59,7 +59,7 @@ define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -70,7 +70,7 @@ define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
@ -80,7 +80,7 @@ define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -90,7 +90,7 @@ define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -101,7 +101,7 @@ define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmadd_baa_sd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
@ -111,7 +111,7 @@ define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmadd_aba_sd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -121,7 +121,7 @@ define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmadd_bba_sd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -132,7 +132,7 @@ define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmadd_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
@ -142,7 +142,7 @@ define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmadd_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -152,7 +152,7 @@ define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmadd_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -163,7 +163,7 @@ define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #
; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
@ -173,7 +173,7 @@ define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #
; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -183,7 +183,7 @@ define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #
; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -194,7 +194,7 @@ define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fnmadd_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
@ -204,7 +204,7 @@ define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fnmadd_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -214,7 +214,7 @@ define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fnmadd_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -225,7 +225,7 @@ define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0
; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
@ -235,7 +235,7 @@ define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0
; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -245,7 +245,7 @@ define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0
; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -256,7 +256,7 @@ define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fnmadd_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
@ -266,7 +266,7 @@ define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fnmadd_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -276,7 +276,7 @@ define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fnmadd_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -287,7 +287,7 @@ define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b)
; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
@ -297,7 +297,7 @@ define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b)
; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -307,7 +307,7 @@ define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b)
; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -318,7 +318,7 @@ define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmsub_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
@ -328,7 +328,7 @@ define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmsub_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -338,7 +338,7 @@ define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmsub_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -349,7 +349,7 @@ define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
@ -359,7 +359,7 @@ define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -369,7 +369,7 @@ define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -380,7 +380,7 @@ define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmsub_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
@ -390,7 +390,7 @@ define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmsub_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -400,7 +400,7 @@ define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fmsub_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -411,7 +411,7 @@ define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #
; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
@ -421,7 +421,7 @@ define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #
; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -431,7 +431,7 @@ define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #
; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -442,7 +442,7 @@ define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fnmsub_baa_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
ret <4 x float> %res
@ -452,7 +452,7 @@ define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fnmsub_aba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %xmm0
; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -462,7 +462,7 @@ define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
; FMA4-LABEL: test_x86_fnmsub_bba_ps:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %xmm0
; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -473,7 +473,7 @@ define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0
; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
ret <8 x float> %res
@ -483,7 +483,7 @@ define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0
; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rcx), %ymm0
; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -493,7 +493,7 @@ define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0
; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovaps (%rdx), %ymm0
; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
ret <8 x float> %res
@ -504,7 +504,7 @@ define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fnmsub_baa_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
ret <2 x double> %res
@ -514,7 +514,7 @@ define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fnmsub_aba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %xmm0
; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -524,7 +524,7 @@ define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0
; FMA4-LABEL: test_x86_fnmsub_bba_pd:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %xmm0
; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
; FMA4-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -535,7 +535,7 @@ define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b)
; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
ret <4 x double> %res
@ -545,7 +545,7 @@ define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b)
; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rcx), %ymm0
; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res
@ -555,7 +555,7 @@ define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b)
; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vmovapd (%rdx), %ymm0
; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
; FMA4-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
ret <4 x double> %res

View File

@ -9,7 +9,7 @@ declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b,
define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
@ -20,7 +20,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i)
@ -30,7 +30,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c)
@ -40,7 +40,7 @@ define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c)
@ -50,7 +50,7 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
%sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
@ -61,7 +61,7 @@ define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
@ -72,7 +72,7 @@ define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i)
@ -82,7 +82,7 @@ define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %sub.i, <2 x double> %c)
@ -92,7 +92,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test9:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c)
@ -102,7 +102,7 @@ define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
%sub.i.2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c

View File

@ -6,6 +6,7 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -16,6 +17,7 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -26,6 +28,7 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
@ -36,6 +39,7 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
@ -47,6 +51,7 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -57,6 +62,7 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -67,6 +73,7 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
@ -77,6 +84,7 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
@ -88,6 +96,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -98,6 +107,7 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -108,6 +118,7 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
@ -118,6 +129,7 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double>
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
@ -129,6 +141,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -139,6 +152,7 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -149,6 +163,7 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
@ -159,6 +174,7 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double>
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
@ -170,6 +186,7 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -180,6 +197,7 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -190,6 +208,7 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
@ -200,6 +219,7 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
@ -211,6 +231,7 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -221,6 +242,7 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -231,6 +253,7 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
@ -241,6 +264,7 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res

View File

@ -7,6 +7,7 @@ define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
@ -16,6 +17,7 @@ define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1
; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
ret <4 x float> %res
@ -26,6 +28,7 @@ define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
@ -35,6 +38,7 @@ define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double>
; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
ret <2 x double> %res
@ -45,6 +49,7 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %1
@ -54,6 +59,7 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %1
@ -63,6 +69,7 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %1
@ -72,6 +79,7 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %1
@ -82,6 +90,7 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
@ -92,6 +101,7 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
@ -102,6 +112,7 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
@ -112,6 +123,7 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
@ -123,6 +135,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
@ -133,6 +146,7 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
@ -143,6 +157,7 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
@ -153,6 +168,7 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double>
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
@ -164,6 +180,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
@ -175,6 +192,7 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1,
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
@ -186,6 +204,7 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
@ -197,6 +216,7 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double>
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
@ -209,6 +229,7 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
@ -221,6 +242,7 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
@ -233,6 +255,7 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
@ -245,6 +268,7 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
@ -258,6 +282,7 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1,
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
@ -270,6 +295,7 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
@ -282,6 +308,7 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
@ -294,6 +321,7 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: retq # encoding: [0xc3]
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2

View File

@ -7,6 +7,7 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x floa
; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load float , float *%a2
%y = insertelement <4 x float> undef, float %x, i32 0
@ -17,6 +18,7 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a
; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6a,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load float , float *%a1
%y = insertelement <4 x float> undef, float %x, i32 0
@ -30,6 +32,7 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x do
; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load double , double *%a2
%y = insertelement <2 x double> undef, double %x, i32 0
@ -40,6 +43,7 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double*
; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6b,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load double , double *%a1
%y = insertelement <2 x double> undef, double %x, i32 0
@ -51,6 +55,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float
; CHECK-LABEL: test_x86_fma_vfmadd_ps_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <4 x float>, <4 x float>* %a2
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
@ -60,6 +65,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x floa
; CHECK-LABEL: test_x86_fma_vfmadd_ps_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x68,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <4 x float>, <4 x float>* %a1
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
@ -73,6 +79,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x flo
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0f]
; CHECK-NEXT: vfmaddps %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x68,0x06,0x00]
; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <4 x float>, <4 x float>* %a0
%y = load <4 x float>, <4 x float>* %a1
@ -84,6 +91,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x dou
; CHECK-LABEL: test_x86_fma_vfmadd_pd_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <2 x double>, <2 x double>* %a2
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
@ -93,6 +101,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x do
; CHECK-LABEL: test_x86_fma_vfmadd_pd_load2:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x69,0x07,0x10]
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <2 x double>, <2 x double>* %a1
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
@ -106,6 +115,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x d
; CHECK: # %bb.0:
; CHECK-NEXT: vmovapd (%rdi), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0f]
; CHECK-NEXT: vfmaddpd %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x69,0x06,0x00]
; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0
; CHECK-NEXT: retq # encoding: [0xc3]
%x = load <2 x double>, <2 x double>* %a0
%y = load <2 x double>, <2 x double>* %a1

View File

@ -10,7 +10,7 @@ define void @fmadd_aab_ss(float* %a, float* %b) {
; CHECK-LABEL: fmadd_aab_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vfmaddss (%rsi), %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
@ -36,7 +36,7 @@ define void @fmadd_aba_ss(float* %a, float* %b) {
; CHECK-LABEL: fmadd_aba_ss:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vfmaddss %xmm0, (%rsi), %xmm0, %xmm0
; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
@ -62,7 +62,7 @@ define void @fmadd_aab_sd(double* %a, double* %b) {
; CHECK-LABEL: fmadd_aab_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
@ -84,7 +84,7 @@ define void @fmadd_aba_sd(double* %a, double* %b) {
; CHECK-LABEL: fmadd_aba_sd:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0
; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a

View File

@ -20,7 +20,7 @@ define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
;
; FMA4-LABEL: test_f32_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fmadd:
@ -40,7 +40,7 @@ define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float
;
; FMA4-LABEL: test_4f32_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fmadd:
@ -60,7 +60,7 @@ define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float
;
; FMA4-LABEL: test_8f32_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fmadd:
@ -80,7 +80,7 @@ define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
;
; FMA4-LABEL: test_f64_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fmadd:
@ -100,7 +100,7 @@ define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x do
;
; FMA4-LABEL: test_2f64_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fmadd:
@ -120,7 +120,7 @@ define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x do
;
; FMA4-LABEL: test_4f64_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fmadd:
@ -144,7 +144,7 @@ define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
;
; FMA4-LABEL: test_f32_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fmsub:
@ -164,7 +164,7 @@ define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float
;
; FMA4-LABEL: test_4f32_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fmsub:
@ -184,7 +184,7 @@ define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float
;
; FMA4-LABEL: test_8f32_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fmsub:
@ -204,7 +204,7 @@ define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
;
; FMA4-LABEL: test_f64_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fmsub:
@ -224,7 +224,7 @@ define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x do
;
; FMA4-LABEL: test_2f64_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fmsub:
@ -244,7 +244,7 @@ define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x do
;
; FMA4-LABEL: test_4f64_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fmsub:
@ -268,7 +268,7 @@ define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
;
; FMA4-LABEL: test_f32_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fnmadd:
@ -288,7 +288,7 @@ define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x floa
;
; FMA4-LABEL: test_4f32_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fnmadd:
@ -308,7 +308,7 @@ define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x floa
;
; FMA4-LABEL: test_8f32_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fnmadd:
@ -328,7 +328,7 @@ define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
;
; FMA4-LABEL: test_f64_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fnmadd:
@ -348,7 +348,7 @@ define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x d
;
; FMA4-LABEL: test_2f64_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fnmadd:
@ -368,7 +368,7 @@ define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x d
;
; FMA4-LABEL: test_4f64_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fnmadd:
@ -392,7 +392,7 @@ define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
;
; FMA4-LABEL: test_f32_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f32_fnmsub:
@ -413,7 +413,7 @@ define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x floa
;
; FMA4-LABEL: test_4f32_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fnmsub:
@ -434,7 +434,7 @@ define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x floa
;
; FMA4-LABEL: test_8f32_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f32_fnmsub:
@ -455,7 +455,7 @@ define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
;
; FMA4-LABEL: test_f64_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fnmsub:
@ -476,7 +476,7 @@ define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x d
;
; FMA4-LABEL: test_2f64_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fnmsub:
@ -497,7 +497,7 @@ define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x d
;
; FMA4-LABEL: test_4f64_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f64_fnmsub:
@ -522,7 +522,7 @@ define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x
;
; FMA4-LABEL: test_4f32_fmadd_load:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_4f32_fmadd_load:
@ -543,7 +543,7 @@ define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <
;
; FMA4-LABEL: test_2f64_fmsub_load:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_2f64_fmsub_load:
@ -586,7 +586,7 @@ define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
@ -624,7 +624,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
@ -662,7 +662,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
@ -700,7 +700,7 @@ define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
@ -738,7 +738,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
@ -776,7 +776,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x fl
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
@ -817,7 +817,7 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
@ -858,7 +858,7 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
@ -899,7 +899,7 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
@ -940,7 +940,7 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
@ -981,7 +981,7 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
@ -1022,7 +1022,7 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x fl
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
@ -1060,7 +1060,7 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
@ -1098,7 +1098,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
@ -1136,7 +1136,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
@ -1174,7 +1174,7 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
@ -1212,7 +1212,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
@ -1250,7 +1250,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x fl
;
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
@ -1280,7 +1280,7 @@ define float @test_f32_interp(float %x, float %y, float %t) {
; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_f32_interp:
@ -1299,8 +1299,8 @@ define float @test_f32_interp(float %x, float %y, float %t) {
;
; FMA4-NOINFS-LABEL: test_f32_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm1, %xmm2, %xmm1
; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_f32_interp:
@ -1329,7 +1329,7 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f32_interp:
@ -1348,8 +1348,8 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
;
; FMA4-NOINFS-LABEL: test_v4f32_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm2, %xmm1
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f32_interp:
@ -1378,7 +1378,7 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v8f32_interp:
@ -1397,8 +1397,8 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
;
; FMA4-NOINFS-LABEL: test_v8f32_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm1, %ymm2, %ymm1
; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f32_interp:
@ -1427,7 +1427,7 @@ define double @test_f64_interp(double %x, double %y, double %t) {
; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_f64_interp:
@ -1446,8 +1446,8 @@ define double @test_f64_interp(double %x, double %y, double %t) {
;
; FMA4-NOINFS-LABEL: test_f64_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm1, %xmm2, %xmm1
; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_f64_interp:
@ -1476,7 +1476,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v2f64_interp:
@ -1495,8 +1495,8 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
;
; FMA4-NOINFS-LABEL: test_v2f64_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm1, %xmm2, %xmm1
; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm2, %xmm0, %xmm0
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v2f64_interp:
@ -1525,7 +1525,7 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v4f64_interp:
@ -1544,8 +1544,8 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
;
; FMA4-NOINFS-LABEL: test_v4f64_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm1, %ymm2, %ymm1
; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v4f64_interp:
@ -1572,7 +1572,7 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x
;
; FMA4-LABEL: test_v4f32_fneg_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fneg_fmadd:
@ -1593,7 +1593,7 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <
;
; FMA4-LABEL: test_v4f64_fneg_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fmsub:
@ -1614,7 +1614,7 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4
;
; FMA4-LABEL: test_v4f32_fneg_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fneg_fnmadd:
@ -1636,7 +1636,7 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
;
; FMA4-LABEL: test_v4f64_fneg_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fnmsub:
@ -1687,7 +1687,7 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y
;
; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
@ -1712,7 +1712,7 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 {
; FMA4-LABEL: test_f64_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_f64_fneg_fmul:
@ -1735,7 +1735,7 @@ define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
; FMA4-LABEL: test_v4f32_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f32_fneg_fmul:
@ -1758,7 +1758,7 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
; FMA4-LABEL: test_v4f64_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v4f64_fneg_fmul:

View File

@ -21,8 +21,8 @@ define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x
;
; FMA4-LABEL: test_16f32_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fmadd:
@ -43,8 +43,8 @@ define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x do
;
; FMA4-LABEL: test_8f64_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fmadd:
@ -69,8 +69,8 @@ define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x
;
; FMA4-LABEL: test_16f32_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fmsub:
@ -91,8 +91,8 @@ define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x do
;
; FMA4-LABEL: test_8f64_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fmsub:
@ -117,8 +117,8 @@ define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x
;
; FMA4-LABEL: test_16f32_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fnmadd:
@ -139,8 +139,8 @@ define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x d
;
; FMA4-LABEL: test_8f64_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fnmadd:
@ -165,8 +165,8 @@ define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x
;
; FMA4-LABEL: test_16f32_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fnmsub:
@ -188,8 +188,8 @@ define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x d
;
; FMA4-LABEL: test_8f64_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fnmsub:
@ -215,8 +215,8 @@ define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1,
;
; FMA4-LABEL: test_16f32_fmadd_load:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_16f32_fmadd_load:
@ -238,8 +238,8 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <
;
; FMA4-LABEL: test_8f64_fmsub_load:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_8f64_fmsub_load:
@ -289,8 +289,8 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
@ -335,8 +335,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
@ -381,8 +381,8 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
@ -427,8 +427,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double>
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
@ -474,8 +474,8 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
@ -521,8 +521,8 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
@ -568,8 +568,8 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
@ -615,8 +615,8 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double>
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
@ -661,8 +661,8 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
@ -707,8 +707,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
@ -753,8 +753,8 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float
;
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
@ -799,8 +799,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double>
;
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
@ -835,8 +835,8 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2
; FMA4-INFS-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v16f32_interp:
@ -857,10 +857,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
;
; FMA4-NOINFS-LABEL: test_v16f32_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm5, %ymm3
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm4, %ymm2
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm4, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm5, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v16f32_interp:
@ -894,8 +894,8 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2
; FMA4-INFS-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
; FMA4-INFS-NEXT: retq
;
; AVX512-INFS-LABEL: test_v8f64_interp:
@ -916,10 +916,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
;
; FMA4-NOINFS-LABEL: test_v8f64_interp:
; FMA4-NOINFS: # %bb.0:
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm5, %ymm3
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm4, %ymm2
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm4, %ymm0, %ymm0
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm5, %ymm1, %ymm1
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
; FMA4-NOINFS-NEXT: retq
;
; AVX512-NOINFS-LABEL: test_v8f64_interp:
@ -947,8 +947,8 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1,
;
; FMA4-LABEL: test_v16f32_fneg_fmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fneg_fmadd:
@ -970,8 +970,8 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <
;
; FMA4-LABEL: test_v8f64_fneg_fmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v8f64_fneg_fmsub:
@ -993,8 +993,8 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1,
;
; FMA4-LABEL: test_v16f32_fneg_fnmadd:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fneg_fnmadd:
@ -1017,8 +1017,8 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1,
;
; FMA4-LABEL: test_v8f64_fneg_fnmsub:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v8f64_fneg_fnmsub:
@ -1072,8 +1072,8 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float
;
; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
; FMA4: # %bb.0:
; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
@ -1099,8 +1099,8 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0
; FMA4-LABEL: test_v16f32_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v16f32_fneg_fmul:
@ -1124,8 +1124,8 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
; FMA4-LABEL: test_v8f64_fneg_fmul:
; FMA4: # %bb.0:
; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
; FMA4-NEXT: retq
;
; AVX512-LABEL: test_v8f64_fneg_fmul:

View File

@ -13,7 +13,7 @@ define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x do
;
; FMA4-LABEL: mul_addsub_pd128:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <2 x double> %A, %B
@ -31,7 +31,7 @@ define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
;
; FMA4-LABEL: mul_addsub_ps128:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x float> %A, %B
@ -49,7 +49,7 @@ define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
;
; FMA4-LABEL: mul_addsub_pd256:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x double> %A, %B
@ -67,7 +67,7 @@ define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
;
; FMA4-LABEL: mul_addsub_ps256:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x float> %A, %B
@ -91,8 +91,8 @@ define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
;
; FMA4-LABEL: mul_addsub_pd512:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x double> %A, %B
@ -116,8 +116,8 @@ define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
;
; FMA4-LABEL: mul_addsub_ps512:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <16 x float> %A, %B
@ -135,7 +135,7 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D,
;
; FMA4-LABEL: buildvector_mul_addsub_ps128:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x float> %C, %D
@ -166,7 +166,7 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double>
;
; FMA4-LABEL: buildvector_mul_addsub_pd128:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <2 x double> %C, %D
@ -189,7 +189,7 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D,
;
; FMA4-LABEL: buildvector_mul_addsub_ps256:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <8 x float> %C, %D
@ -236,7 +236,7 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double>
;
; FMA4-LABEL: buildvector_mul_addsub_pd256:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x double> %C, %D
@ -273,8 +273,8 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float>
;
; FMA4-LABEL: buildvector_mul_addsub_ps512:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <16 x float> %C, %D
@ -359,8 +359,8 @@ define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double>
;
; FMA4-LABEL: buildvector_mul_addsub_pd512:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <8 x double> %C, %D
@ -404,7 +404,7 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D,
;
; FMA4-LABEL: buildvector_mul_subadd_ps128:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x float> %C, %D
@ -435,7 +435,7 @@ define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double>
;
; FMA4-LABEL: buildvector_mul_subadd_pd128:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <2 x double> %C, %D
@ -458,7 +458,7 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
;
; FMA4-LABEL: buildvector_mul_subadd_ps256:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <8 x float> %C, %D
@ -505,7 +505,7 @@ define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double>
;
; FMA4-LABEL: buildvector_mul_subadd_pd256:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x double> %C, %D
@ -542,8 +542,8 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
;
; FMA4-LABEL: buildvector_mul_subadd_ps512:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <16 x float> %C, %D
@ -628,8 +628,8 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
;
; FMA4-LABEL: buildvector_mul_subadd_pd512:
; FMA4: # %bb.0: # %bb
; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <8 x double> %C, %D

View File

@ -22,7 +22,7 @@ define float @f1(float %0, float %1, float %2) #0 {
;
; FMA4-LABEL: f1:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg float %0
@ -50,7 +50,7 @@ define double @f2(double %0, double %1, double %2) #0 {
;
; FMA4-LABEL: f2:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg double %0
@ -78,7 +78,7 @@ define float @f3(float %0, float %1, float %2) #0 {
;
; FMA4-LABEL: f3:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg float %2
@ -106,7 +106,7 @@ define double @f4(double %0, double %1, double %2) #0 {
;
; FMA4-LABEL: f4:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg double %2
@ -136,7 +136,7 @@ define float @f5(float %0, float %1, float %2) #0 {
;
; FMA4-LABEL: f5:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg float %0
@ -167,7 +167,7 @@ define double @f6(double %0, double %1, double %2) #0 {
;
; FMA4-LABEL: f6:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg double %0
@ -197,7 +197,7 @@ define float @f7(float %0, float %1, float %2) #0 {
;
; FMA4-LABEL: f7:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
@ -234,7 +234,7 @@ define double @f8(double %0, double %1, double %2) #0 {
;
; FMA4-LABEL: f8:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
@ -267,7 +267,7 @@ define float @f9(float %0, float %1, float %2) #0 {
;
; FMA4-LABEL: f9:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
@ -309,7 +309,7 @@ define double @f10(double %0, double %1, double %2) #0 {
;
; FMA4-LABEL: f10:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
@ -400,7 +400,7 @@ define float @f15() #0 {
; FMA4-LABEL: f15:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.fmuladd.f32(
@ -432,7 +432,7 @@ define double @f16() #0 {
; FMA4-LABEL: f16:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.fmuladd.f64(
@ -468,7 +468,7 @@ define float @f17() #0 {
; FMA4-LABEL: f17:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.fma.f32(
@ -504,7 +504,7 @@ define double @f18() #0 {
; FMA4-LABEL: f18:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
; FMA4-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.fma.f64(
@ -568,7 +568,7 @@ define <4 x float> @f19(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
;
; FMA4-LABEL: f19:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <4 x float> %0
@ -610,7 +610,7 @@ define <2 x double> @f20(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
;
; FMA4-LABEL: f20:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <2 x double> %0
@ -672,7 +672,7 @@ define <4 x float> @f21(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
;
; FMA4-LABEL: f21:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <4 x float> %2
@ -714,7 +714,7 @@ define <2 x double> @f22(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
;
; FMA4-LABEL: f22:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <2 x double> %2
@ -778,7 +778,7 @@ define <4 x float> @f23(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
;
; FMA4-LABEL: f23:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <4 x float> %0
@ -823,7 +823,7 @@ define <2 x double> @f24(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
;
; FMA4-LABEL: f24:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: retq
entry:
%3 = fneg <2 x double> %0
@ -887,7 +887,7 @@ define <4 x float> @f25(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
;
; FMA4-LABEL: f25:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
@ -938,7 +938,7 @@ define <2 x double> @f26(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
;
; FMA4-LABEL: f26:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:
@ -1005,7 +1005,7 @@ define <4 x float> @f27(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
;
; FMA4-LABEL: f27:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
;
@ -1061,7 +1061,7 @@ define <2 x double> @f28(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
;
; FMA4-LABEL: f28:
; FMA4: # %bb.0: # %entry
; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; FMA4-NEXT: retq
entry:

View File

@ -14,8 +14,8 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c)
; CHECK-NEXT: .cfi_def_cfa_register %ebp
; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: vfmaddps 8(%ebp), %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vfmaddps 40(%ebp), %ymm3, %ymm1, %ymm1
; CHECK-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + mem
; CHECK-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + mem
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl