forked from OSchip/llvm-project
[X86] X86InstComments - add FMA4 comments
These typically match the FMA3 equivalents, although the multiply operands sometimes get flipped due to the FMA3 permute variants.
This commit is contained in:
parent
10417ad2e4
commit
4aa7b9cc96
|
@ -199,6 +199,40 @@ using namespace llvm;
|
|||
CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
|
||||
CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
|
||||
|
||||
#define CASE_FMA4(Inst, suf) \
|
||||
CASE_AVX_INS_COMMON(Inst, 4, suf) \
|
||||
CASE_AVX_INS_COMMON(Inst, 4Y, suf)
|
||||
|
||||
#define CASE_FMA4_PACKED_RR(Inst) \
|
||||
CASE_FMA4(Inst##PD, rr) \
|
||||
CASE_FMA4(Inst##PS, rr)
|
||||
|
||||
#define CASE_FMA4_PACKED_RM(Inst) \
|
||||
CASE_FMA4(Inst##PD, rm) \
|
||||
CASE_FMA4(Inst##PS, rm)
|
||||
|
||||
#define CASE_FMA4_PACKED_MR(Inst) \
|
||||
CASE_FMA4(Inst##PD, mr) \
|
||||
CASE_FMA4(Inst##PS, mr)
|
||||
|
||||
#define CASE_FMA4_SCALAR_RR(Inst) \
|
||||
CASE_AVX_INS_COMMON(Inst##SD4, , rr) \
|
||||
CASE_AVX_INS_COMMON(Inst##SS4, , rr) \
|
||||
CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \
|
||||
CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int)
|
||||
|
||||
#define CASE_FMA4_SCALAR_RM(Inst) \
|
||||
CASE_AVX_INS_COMMON(Inst##SD4, , rm) \
|
||||
CASE_AVX_INS_COMMON(Inst##SS4, , rm) \
|
||||
CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \
|
||||
CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int)
|
||||
|
||||
#define CASE_FMA4_SCALAR_MR(Inst) \
|
||||
CASE_AVX_INS_COMMON(Inst##SD4, , mr) \
|
||||
CASE_AVX_INS_COMMON(Inst##SS4, , mr) \
|
||||
CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \
|
||||
CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
|
||||
|
||||
static unsigned getVectorRegSize(unsigned RegNo) {
|
||||
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
|
||||
return 512;
|
||||
|
@ -247,14 +281,14 @@ static void printMasking(raw_ostream &OS, const MCInst *MI,
|
|||
OS << " {z}";
|
||||
}
|
||||
|
||||
static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
|
||||
static bool printFMAComments(const MCInst *MI, raw_ostream &OS) {
|
||||
const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
|
||||
unsigned NumOperands = MI->getNumOperands();
|
||||
bool RegForm = false;
|
||||
bool Negate = false;
|
||||
StringRef AccStr = "+";
|
||||
|
||||
// The operands for FMA instructions without rounding fall into two forms.
|
||||
// The operands for FMA3 instructions without rounding fall into two forms:
|
||||
// dest, src1, src2, src3
|
||||
// dest, src1, mask, src2, src3
|
||||
// Where src3 is either a register or 5 memory address operands. So to find
|
||||
|
@ -262,9 +296,118 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
|
|||
// index from the end by taking into account memory vs register form when
|
||||
// finding src2.
|
||||
|
||||
// The operands for FMA4 instructions:
|
||||
// dest, src1, src2, src3
|
||||
// Where src2 OR src3 are either a register or 5 memory address operands. So
|
||||
// to find dest and src1 we can index from the front, src2 (reg/mem) follows
|
||||
// and then src3 (reg) will be at the end.
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
|
||||
CASE_FMA4_PACKED_RR(FMADD)
|
||||
CASE_FMA4_SCALAR_RR(FMADD)
|
||||
RegForm = true;
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_FMA4_PACKED_RM(FMADD)
|
||||
CASE_FMA4_SCALAR_RM(FMADD)
|
||||
Mul2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
CASE_FMA4_PACKED_MR(FMADD)
|
||||
CASE_FMA4_SCALAR_MR(FMADD)
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
|
||||
CASE_FMA4_PACKED_RR(FMSUB)
|
||||
CASE_FMA4_SCALAR_RR(FMSUB)
|
||||
RegForm = true;
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_FMA4_PACKED_RM(FMSUB)
|
||||
CASE_FMA4_SCALAR_RM(FMSUB)
|
||||
Mul2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "-";
|
||||
break;
|
||||
CASE_FMA4_PACKED_MR(FMSUB)
|
||||
CASE_FMA4_SCALAR_MR(FMSUB)
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "-";
|
||||
break;
|
||||
|
||||
CASE_FMA4_PACKED_RR(FNMADD)
|
||||
CASE_FMA4_SCALAR_RR(FNMADD)
|
||||
RegForm = true;
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_FMA4_PACKED_RM(FNMADD)
|
||||
CASE_FMA4_SCALAR_RM(FNMADD)
|
||||
Mul2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
Negate = true;
|
||||
break;
|
||||
CASE_FMA4_PACKED_MR(FNMADD)
|
||||
CASE_FMA4_SCALAR_MR(FNMADD)
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
Negate = true;
|
||||
break;
|
||||
|
||||
CASE_FMA4_PACKED_RR(FNMSUB)
|
||||
CASE_FMA4_SCALAR_RR(FNMSUB)
|
||||
RegForm = true;
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_FMA4_PACKED_RM(FNMSUB)
|
||||
CASE_FMA4_SCALAR_RM(FNMSUB)
|
||||
Mul2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "-";
|
||||
Negate = true;
|
||||
break;
|
||||
CASE_FMA4_PACKED_MR(FNMSUB)
|
||||
CASE_FMA4_SCALAR_MR(FNMSUB)
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "-";
|
||||
Negate = true;
|
||||
break;
|
||||
|
||||
CASE_FMA4_PACKED_RR(FMADDSUB)
|
||||
RegForm = true;
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_FMA4_PACKED_RM(FMADDSUB)
|
||||
Mul2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "+/-";
|
||||
break;
|
||||
CASE_FMA4_PACKED_MR(FMADDSUB)
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "+/-";
|
||||
break;
|
||||
|
||||
CASE_FMA4_PACKED_RR(FMSUBADD)
|
||||
RegForm = true;
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_FMA4_PACKED_RM(FMSUBADD)
|
||||
Mul2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "-/+";
|
||||
break;
|
||||
CASE_FMA4_PACKED_MR(FMSUBADD)
|
||||
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
Mul1Name = getRegName(MI->getOperand(1).getReg());
|
||||
AccStr = "-/+";
|
||||
break;
|
||||
|
||||
CASE_FMA_PACKED_REG(FMADD132)
|
||||
CASE_FMA_SCALAR_REG(FMADD132)
|
||||
Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
|
@ -504,7 +647,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
unsigned NumOperands = MI->getNumOperands();
|
||||
bool RegForm = false;
|
||||
|
||||
if (printFMA3Comments(MI, OS))
|
||||
if (printFMAComments(MI, OS))
|
||||
return true;
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
|
||||
; CHECK-LABEL: fmafunc:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retl
|
||||
;
|
||||
; CHECK-NOFMA-LABEL: fmafunc:
|
||||
|
|
|
@ -12,7 +12,7 @@ define float @test_fneg_fma_subx_y_negz_f32(float %w, float %x, float %y, float
|
|||
; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm3
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%subx = fsub nsz float %w, %x
|
||||
|
@ -32,7 +32,7 @@ define float @test_fneg_fma_x_suby_negz_f32(float %w, float %x, float %y, float
|
|||
; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%suby = fsub nsz float %w, %y
|
||||
|
@ -54,7 +54,7 @@ define float @test_fneg_fma_subx_suby_negz_f32(float %w, float %x, float %y, flo
|
|||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1
|
||||
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%subx = fsub nsz float %w, %x
|
||||
|
@ -75,7 +75,7 @@ define float @test_fneg_fma_subx_negy_negz_f32(float %w, float %x, float %y, flo
|
|||
; FMA4-LABEL: test_fneg_fma_subx_negy_negz_f32:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss %xmm3, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm3
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%subx = fsub nsz float %w, %x
|
||||
|
@ -96,7 +96,7 @@ define <4 x float> @test_fma_rcp_fneg_v4f32(<4 x float> %x, <4 x float> %y, <4 x
|
|||
; FMA4-LABEL: test_fma_rcp_fneg_v4f32:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vrcpps %xmm2, %xmm2
|
||||
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%0 = fneg <4 x float> %z
|
||||
|
@ -118,7 +118,7 @@ define float @negated_constant(float %x) {
|
|||
; FMA4-LABEL: negated_constant:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
|
||||
; FMA4-NEXT: vfnmsubss %xmm1, {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1
|
||||
; FMA4-NEXT: retq
|
||||
%m = fmul float %x, 42.0
|
||||
%fma = call nsz float @llvm.fma.f32(float %x, float -42.0, float %m)
|
||||
|
|
|
@ -8,7 +8,7 @@ define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_baa_ss:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -18,7 +18,7 @@ define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_aba_ss:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; FMA4-NEXT: vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -28,7 +28,7 @@ define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_bba_ss:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; FMA4-NEXT: vfmaddss (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -39,7 +39,7 @@ define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_baa_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -49,7 +49,7 @@ define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_aba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -59,7 +59,7 @@ define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_bba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfmaddps (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -70,7 +70,7 @@ define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -80,7 +80,7 @@ define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -90,7 +90,7 @@ define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfmaddps (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -101,7 +101,7 @@ define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmadd_baa_sd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -111,7 +111,7 @@ define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmadd_aba_sd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; FMA4-NEXT: vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -121,7 +121,7 @@ define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmadd_bba_sd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; FMA4-NEXT: vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -132,7 +132,7 @@ define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmadd_baa_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -142,7 +142,7 @@ define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmadd_aba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -152,7 +152,7 @@ define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmadd_bba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -163,7 +163,7 @@ define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #
|
|||
; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -173,7 +173,7 @@ define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #
|
|||
; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -183,7 +183,7 @@ define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #
|
|||
; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -194,7 +194,7 @@ define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fnmadd_baa_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -204,7 +204,7 @@ define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fnmadd_aba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -214,7 +214,7 @@ define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fnmadd_bba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -225,7 +225,7 @@ define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -235,7 +235,7 @@ define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -245,7 +245,7 @@ define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -256,7 +256,7 @@ define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmadd_baa_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -266,7 +266,7 @@ define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmadd_aba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -276,7 +276,7 @@ define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmadd_bba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -287,7 +287,7 @@ define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b)
|
|||
; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -297,7 +297,7 @@ define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b)
|
|||
; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -307,7 +307,7 @@ define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b)
|
|||
; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -318,7 +318,7 @@ define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmsub_baa_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -328,7 +328,7 @@ define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmsub_aba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -338,7 +338,7 @@ define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmsub_bba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfmsubps (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -349,7 +349,7 @@ define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -359,7 +359,7 @@ define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -369,7 +369,7 @@ define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfmsubps (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -380,7 +380,7 @@ define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmsub_baa_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -390,7 +390,7 @@ define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmsub_aba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -400,7 +400,7 @@ define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fmsub_bba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -411,7 +411,7 @@ define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #
|
|||
; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -421,7 +421,7 @@ define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #
|
|||
; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -431,7 +431,7 @@ define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #
|
|||
; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -442,7 +442,7 @@ define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fnmsub_baa_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -452,7 +452,7 @@ define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fnmsub_aba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -462,7 +462,7 @@ define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
|
|||
; FMA4-LABEL: test_x86_fnmsub_bba_ps:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
|
@ -473,7 +473,7 @@ define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -483,7 +483,7 @@ define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -493,7 +493,7 @@ define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovaps (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
|
||||
ret <8 x float> %res
|
||||
|
@ -504,7 +504,7 @@ define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmsub_baa_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -514,7 +514,7 @@ define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmsub_aba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -524,7 +524,7 @@ define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0
|
|||
; FMA4-LABEL: test_x86_fnmsub_bba_pd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
|
@ -535,7 +535,7 @@ define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b)
|
|||
; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -545,7 +545,7 @@ define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b)
|
|||
; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rcx), %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
@ -555,7 +555,7 @@ define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b)
|
|||
; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vmovapd (%rdx), %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
|
||||
; FMA4-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
|
||||
ret <4 x double> %res
|
||||
|
|
|
@ -9,7 +9,7 @@ declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b,
|
|||
define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
||||
|
@ -20,7 +20,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
|
||||
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i)
|
||||
|
@ -30,7 +30,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c)
|
||||
|
@ -40,7 +40,7 @@ define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
||||
%res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c)
|
||||
|
@ -50,7 +50,7 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
|
||||
%sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
|
||||
|
@ -61,7 +61,7 @@ define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
||||
|
@ -72,7 +72,7 @@ define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|||
define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
|
||||
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i)
|
||||
|
@ -82,7 +82,7 @@ define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|||
define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
|
||||
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %sub.i, <2 x double> %c)
|
||||
|
@ -92,7 +92,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|||
define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
|
||||
%res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c)
|
||||
|
@ -102,7 +102,7 @@ define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|||
define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||
; CHECK-LABEL: test10:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
|
||||
%sub.i.2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
|
||||
|
|
|
@ -6,6 +6,7 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -16,6 +17,7 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -26,6 +28,7 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
|
@ -36,6 +39,7 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
|
@ -47,6 +51,7 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -57,6 +62,7 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -67,6 +73,7 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
|
@ -77,6 +84,7 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
|
@ -88,6 +96,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -98,6 +107,7 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -108,6 +118,7 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
|
@ -118,6 +129,7 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double>
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
|
@ -129,6 +141,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -139,6 +152,7 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -149,6 +163,7 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
|
@ -159,6 +174,7 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double>
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
|
@ -170,6 +186,7 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -180,6 +197,7 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -190,6 +208,7 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
|
@ -200,6 +219,7 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
|
@ -211,6 +231,7 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -221,6 +242,7 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -231,6 +253,7 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %res
|
||||
|
@ -241,6 +264,7 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %res
|
||||
|
|
|
@ -7,6 +7,7 @@ define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -16,6 +17,7 @@ define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
|
||||
; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
|
||||
ret <4 x float> %res
|
||||
|
@ -26,6 +28,7 @@ define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -35,6 +38,7 @@ define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double>
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
|
||||
; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
|
||||
ret <2 x double> %res
|
||||
|
@ -45,6 +49,7 @@ define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
ret <4 x float> %1
|
||||
|
@ -54,6 +59,7 @@ define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
ret <2 x double> %1
|
||||
|
@ -63,6 +69,7 @@ define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
ret <8 x float> %1
|
||||
|
@ -72,6 +79,7 @@ define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
ret <4 x double> %1
|
||||
|
@ -82,6 +90,7 @@ define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
|
||||
|
@ -92,6 +101,7 @@ define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
|
||||
|
@ -102,6 +112,7 @@ define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
|
||||
|
@ -112,6 +123,7 @@ define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
|
||||
|
@ -123,6 +135,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
|
||||
|
@ -133,6 +146,7 @@ define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
|
||||
|
@ -143,6 +157,7 @@ define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
|
||||
|
@ -153,6 +168,7 @@ define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double>
|
|||
; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
|
||||
|
@ -164,6 +180,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
|
@ -175,6 +192,7 @@ define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
|
@ -186,6 +204,7 @@ define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
|
@ -197,6 +216,7 @@ define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double>
|
|||
; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
|
@ -209,6 +229,7 @@ define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
|
@ -221,6 +242,7 @@ define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
|
@ -233,6 +255,7 @@ define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
|
@ -245,6 +268,7 @@ define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double
|
|||
; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
|
@ -258,6 +282,7 @@ define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1,
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
|
||||
%2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
|
@ -270,6 +295,7 @@ define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
|
||||
%2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
|
@ -282,6 +308,7 @@ define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
|
||||
%2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
|
||||
|
@ -294,6 +321,7 @@ define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double
|
|||
; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
|
||||
; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
|
||||
%2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
|
||||
|
|
|
@ -7,6 +7,7 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x floa
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load float , float *%a2
|
||||
%y = insertelement <4 x float> undef, float %x, i32 0
|
||||
|
@ -17,6 +18,7 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6a,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load float , float *%a1
|
||||
%y = insertelement <4 x float> undef, float %x, i32 0
|
||||
|
@ -30,6 +32,7 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x do
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load double , double *%a2
|
||||
%y = insertelement <2 x double> undef, double %x, i32 0
|
||||
|
@ -40,6 +43,7 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double*
|
|||
; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x6b,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load double , double *%a1
|
||||
%y = insertelement <2 x double> undef, double %x, i32 0
|
||||
|
@ -51,6 +55,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_ps_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load <4 x float>, <4 x float>* %a2
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
|
||||
|
@ -60,6 +65,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x floa
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_ps_load2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x68,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load <4 x float>, <4 x float>* %a1
|
||||
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
|
||||
|
@ -73,6 +79,7 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x flo
|
|||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rdi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0f]
|
||||
; CHECK-NEXT: vfmaddps %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x68,0x06,0x00]
|
||||
; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load <4 x float>, <4 x float>* %a0
|
||||
%y = load <4 x float>, <4 x float>* %a1
|
||||
|
@ -84,6 +91,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x dou
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_pd_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + mem
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load <2 x double>, <2 x double>* %a2
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
|
||||
|
@ -93,6 +101,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x do
|
|||
; CHECK-LABEL: test_x86_fma_vfmadd_pd_load2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x69,0x07,0x10]
|
||||
; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load <2 x double>, <2 x double>* %a1
|
||||
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
|
||||
|
@ -106,6 +115,7 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x d
|
|||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovapd (%rdi), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0f]
|
||||
; CHECK-NEXT: vfmaddpd %xmm0, (%rsi), %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x69,0x06,0x00]
|
||||
; CHECK-NEXT: # xmm0 = (xmm1 * mem) + xmm0
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%x = load <2 x double>, <2 x double>* %a0
|
||||
%y = load <2 x double>, <2 x double>* %a1
|
||||
|
|
|
@ -10,7 +10,7 @@ define void @fmadd_aab_ss(float* %a, float* %b) {
|
|||
; CHECK-LABEL: fmadd_aab_ss:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vfmaddss (%rsi), %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; CHECK-NEXT: vmovss %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%a.val = load float, float* %a
|
||||
|
@ -36,7 +36,7 @@ define void @fmadd_aba_ss(float* %a, float* %b) {
|
|||
; CHECK-LABEL: fmadd_aba_ss:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vfmaddss %xmm0, (%rsi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; CHECK-NEXT: vmovss %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%a.val = load float, float* %a
|
||||
|
@ -62,7 +62,7 @@ define void @fmadd_aab_sd(double* %a, double* %b) {
|
|||
; CHECK-LABEL: fmadd_aab_sd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
|
||||
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%a.val = load double, double* %a
|
||||
|
@ -84,7 +84,7 @@ define void @fmadd_aba_sd(double* %a, double* %b) {
|
|||
; CHECK-LABEL: fmadd_aba_sd:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
|
||||
; CHECK-NEXT: vmovsd %xmm0, (%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%a.val = load double, double* %a
|
||||
|
|
|
@ -20,7 +20,7 @@ define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f32_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f32_fmadd:
|
||||
|
@ -40,7 +40,7 @@ define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float
|
|||
;
|
||||
; FMA4-LABEL: test_4f32_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f32_fmadd:
|
||||
|
@ -60,7 +60,7 @@ define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float
|
|||
;
|
||||
; FMA4-LABEL: test_8f32_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f32_fmadd:
|
||||
|
@ -80,7 +80,7 @@ define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f64_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f64_fmadd:
|
||||
|
@ -100,7 +100,7 @@ define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x do
|
|||
;
|
||||
; FMA4-LABEL: test_2f64_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_2f64_fmadd:
|
||||
|
@ -120,7 +120,7 @@ define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x do
|
|||
;
|
||||
; FMA4-LABEL: test_4f64_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f64_fmadd:
|
||||
|
@ -144,7 +144,7 @@ define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f32_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f32_fmsub:
|
||||
|
@ -164,7 +164,7 @@ define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float
|
|||
;
|
||||
; FMA4-LABEL: test_4f32_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f32_fmsub:
|
||||
|
@ -184,7 +184,7 @@ define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float
|
|||
;
|
||||
; FMA4-LABEL: test_8f32_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f32_fmsub:
|
||||
|
@ -204,7 +204,7 @@ define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f64_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f64_fmsub:
|
||||
|
@ -224,7 +224,7 @@ define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x do
|
|||
;
|
||||
; FMA4-LABEL: test_2f64_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_2f64_fmsub:
|
||||
|
@ -244,7 +244,7 @@ define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x do
|
|||
;
|
||||
; FMA4-LABEL: test_4f64_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f64_fmsub:
|
||||
|
@ -268,7 +268,7 @@ define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f32_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f32_fnmadd:
|
||||
|
@ -288,7 +288,7 @@ define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x floa
|
|||
;
|
||||
; FMA4-LABEL: test_4f32_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f32_fnmadd:
|
||||
|
@ -308,7 +308,7 @@ define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x floa
|
|||
;
|
||||
; FMA4-LABEL: test_8f32_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f32_fnmadd:
|
||||
|
@ -328,7 +328,7 @@ define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f64_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f64_fnmadd:
|
||||
|
@ -348,7 +348,7 @@ define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x d
|
|||
;
|
||||
; FMA4-LABEL: test_2f64_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_2f64_fnmadd:
|
||||
|
@ -368,7 +368,7 @@ define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x d
|
|||
;
|
||||
; FMA4-LABEL: test_4f64_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f64_fnmadd:
|
||||
|
@ -392,7 +392,7 @@ define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f32_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f32_fnmsub:
|
||||
|
@ -413,7 +413,7 @@ define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x floa
|
|||
;
|
||||
; FMA4-LABEL: test_4f32_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f32_fnmsub:
|
||||
|
@ -434,7 +434,7 @@ define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x floa
|
|||
;
|
||||
; FMA4-LABEL: test_8f32_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f32_fnmsub:
|
||||
|
@ -455,7 +455,7 @@ define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
|
|||
;
|
||||
; FMA4-LABEL: test_f64_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f64_fnmsub:
|
||||
|
@ -476,7 +476,7 @@ define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x d
|
|||
;
|
||||
; FMA4-LABEL: test_2f64_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_2f64_fnmsub:
|
||||
|
@ -497,7 +497,7 @@ define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x d
|
|||
;
|
||||
; FMA4-LABEL: test_4f64_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f64_fnmsub:
|
||||
|
@ -522,7 +522,7 @@ define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x
|
|||
;
|
||||
; FMA4-LABEL: test_4f32_fmadd_load:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_4f32_fmadd_load:
|
||||
|
@ -543,7 +543,7 @@ define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <
|
|||
;
|
||||
; FMA4-LABEL: test_2f64_fmsub_load:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_2f64_fmsub_load:
|
||||
|
@ -586,7 +586,7 @@ define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
|
||||
|
@ -624,7 +624,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
|
||||
|
@ -662,7 +662,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
|
||||
|
@ -700,7 +700,7 @@ define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
|
||||
|
@ -738,7 +738,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
|
||||
|
@ -776,7 +776,7 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x fl
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
|
||||
|
@ -817,7 +817,7 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||
|
@ -858,7 +858,7 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||
|
@ -899,7 +899,7 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
|
||||
|
@ -940,7 +940,7 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||
|
@ -981,7 +981,7 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||
|
@ -1022,7 +1022,7 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x fl
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
|
||||
|
@ -1060,7 +1060,7 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
|
@ -1098,7 +1098,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
|
@ -1136,7 +1136,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
|
||||
|
@ -1174,7 +1174,7 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
|
@ -1212,7 +1212,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
|
@ -1250,7 +1250,7 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x fl
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
|
||||
|
@ -1280,7 +1280,7 @@ define float @test_f32_interp(float %x, float %y, float %t) {
|
|||
; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
|
||||
; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
|
||||
; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_f32_interp:
|
||||
|
@ -1299,8 +1299,8 @@ define float @test_f32_interp(float %x, float %y, float %t) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_f32_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubss %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_f32_interp:
|
||||
|
@ -1329,7 +1329,7 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
|
|||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
||||
; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
||||
; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f32_interp:
|
||||
|
@ -1348,8 +1348,8 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f32_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f32_interp:
|
||||
|
@ -1378,7 +1378,7 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
|
|||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
||||
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v8f32_interp:
|
||||
|
@ -1397,8 +1397,8 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f32_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm1, %ymm2, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f32_interp:
|
||||
|
@ -1427,7 +1427,7 @@ define double @test_f64_interp(double %x, double %y, double %t) {
|
|||
; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
|
||||
; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
|
||||
; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_f64_interp:
|
||||
|
@ -1446,8 +1446,8 @@ define double @test_f64_interp(double %x, double %y, double %t) {
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_f64_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubsd %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_f64_interp:
|
||||
|
@ -1476,7 +1476,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
|||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||
; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||
; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v2f64_interp:
|
||||
|
@ -1495,8 +1495,8 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v2f64_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v2f64_interp:
|
||||
|
@ -1525,7 +1525,7 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
|
|||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v4f64_interp:
|
||||
|
@ -1544,8 +1544,8 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v4f64_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm1, %ymm2, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v4f64_interp:
|
||||
|
@ -1572,7 +1572,7 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x
|
|||
;
|
||||
; FMA4-LABEL: test_v4f32_fneg_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_fneg_fmadd:
|
||||
|
@ -1593,7 +1593,7 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <
|
|||
;
|
||||
; FMA4-LABEL: test_v4f64_fneg_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f64_fneg_fmsub:
|
||||
|
@ -1614,7 +1614,7 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4
|
|||
;
|
||||
; FMA4-LABEL: test_v4f32_fneg_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_fneg_fnmadd:
|
||||
|
@ -1636,7 +1636,7 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
|
|||
;
|
||||
; FMA4-LABEL: test_v4f64_fneg_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f64_fneg_fnmsub:
|
||||
|
@ -1687,7 +1687,7 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y
|
|||
;
|
||||
; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
|
||||
|
@ -1712,7 +1712,7 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 {
|
|||
; FMA4-LABEL: test_f64_fneg_fmul:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f64_fneg_fmul:
|
||||
|
@ -1735,7 +1735,7 @@ define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
|
|||
; FMA4-LABEL: test_v4f32_fneg_fmul:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_fneg_fmul:
|
||||
|
@ -1758,7 +1758,7 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
|
|||
; FMA4-LABEL: test_v4f64_fneg_fmul:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f64_fneg_fmul:
|
||||
|
|
|
@ -21,8 +21,8 @@ define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x
|
|||
;
|
||||
; FMA4-LABEL: test_16f32_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_16f32_fmadd:
|
||||
|
@ -43,8 +43,8 @@ define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x do
|
|||
;
|
||||
; FMA4-LABEL: test_8f64_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f64_fmadd:
|
||||
|
@ -69,8 +69,8 @@ define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x
|
|||
;
|
||||
; FMA4-LABEL: test_16f32_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_16f32_fmsub:
|
||||
|
@ -91,8 +91,8 @@ define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x do
|
|||
;
|
||||
; FMA4-LABEL: test_8f64_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f64_fmsub:
|
||||
|
@ -117,8 +117,8 @@ define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x
|
|||
;
|
||||
; FMA4-LABEL: test_16f32_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_16f32_fnmadd:
|
||||
|
@ -139,8 +139,8 @@ define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x d
|
|||
;
|
||||
; FMA4-LABEL: test_8f64_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f64_fnmadd:
|
||||
|
@ -165,8 +165,8 @@ define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x
|
|||
;
|
||||
; FMA4-LABEL: test_16f32_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_16f32_fnmsub:
|
||||
|
@ -188,8 +188,8 @@ define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x d
|
|||
;
|
||||
; FMA4-LABEL: test_8f64_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f64_fnmsub:
|
||||
|
@ -215,8 +215,8 @@ define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1,
|
|||
;
|
||||
; FMA4-LABEL: test_16f32_fmadd_load:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_16f32_fmadd_load:
|
||||
|
@ -238,8 +238,8 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <
|
|||
;
|
||||
; FMA4-LABEL: test_8f64_fmsub_load:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_8f64_fmsub_load:
|
||||
|
@ -289,8 +289,8 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
|
||||
|
@ -335,8 +335,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
|
||||
|
@ -381,8 +381,8 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||
|
@ -427,8 +427,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double>
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||
|
@ -474,8 +474,8 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
|
||||
; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||
|
@ -521,8 +521,8 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
|
||||
; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||
|
@ -568,8 +568,8 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||
|
@ -615,8 +615,8 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double>
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||
|
@ -661,8 +661,8 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
|
@ -707,8 +707,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
|
@ -753,8 +753,8 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
|
@ -799,8 +799,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double>
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
|
@ -835,8 +835,8 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
|
|||
; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
||||
; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
||||
; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2
|
||||
; FMA4-INFS-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
|
||||
; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v16f32_interp:
|
||||
|
@ -857,10 +857,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v16f32_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm5, %ymm3
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm4, %ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm5, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v16f32_interp:
|
||||
|
@ -894,8 +894,8 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
|
|||
; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
||||
; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2
|
||||
; FMA4-INFS-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
|
||||
; FMA4-INFS-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
|
||||
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2
|
||||
; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3
|
||||
; FMA4-INFS-NEXT: retq
|
||||
;
|
||||
; AVX512-INFS-LABEL: test_v8f64_interp:
|
||||
|
@ -916,10 +916,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
|
|||
;
|
||||
; FMA4-NOINFS-LABEL: test_v8f64_interp:
|
||||
; FMA4-NOINFS: # %bb.0:
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm5, %ymm3
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm4, %ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm5, %ymm1, %ymm1
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2
|
||||
; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3
|
||||
; FMA4-NOINFS-NEXT: retq
|
||||
;
|
||||
; AVX512-NOINFS-LABEL: test_v8f64_interp:
|
||||
|
@ -947,8 +947,8 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1,
|
|||
;
|
||||
; FMA4-LABEL: test_v16f32_fneg_fmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_fneg_fmadd:
|
||||
|
@ -970,8 +970,8 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <
|
|||
;
|
||||
; FMA4-LABEL: test_v8f64_fneg_fmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_fneg_fmsub:
|
||||
|
@ -993,8 +993,8 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1,
|
|||
;
|
||||
; FMA4-LABEL: test_v16f32_fneg_fnmadd:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_fneg_fnmadd:
|
||||
|
@ -1017,8 +1017,8 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1,
|
|||
;
|
||||
; FMA4-LABEL: test_v8f64_fneg_fnmsub:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_fneg_fnmsub:
|
||||
|
@ -1072,8 +1072,8 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float
|
|||
;
|
||||
; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
|
||||
|
@ -1099,8 +1099,8 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0
|
|||
; FMA4-LABEL: test_v16f32_fneg_fmul:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||
; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_fneg_fmul:
|
||||
|
@ -1124,8 +1124,8 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
|
|||
; FMA4-LABEL: test_v8f64_fneg_fmul:
|
||||
; FMA4: # %bb.0:
|
||||
; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4
|
||||
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_fneg_fmul:
|
||||
|
|
|
@ -13,7 +13,7 @@ define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x do
|
|||
;
|
||||
; FMA4-LABEL: mul_addsub_pd128:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%AB = fmul <2 x double> %A, %B
|
||||
|
@ -31,7 +31,7 @@ define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
|
|||
;
|
||||
; FMA4-LABEL: mul_addsub_ps128:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%AB = fmul <4 x float> %A, %B
|
||||
|
@ -49,7 +49,7 @@ define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
|
|||
;
|
||||
; FMA4-LABEL: mul_addsub_pd256:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%AB = fmul <4 x double> %A, %B
|
||||
|
@ -67,7 +67,7 @@ define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
|
|||
;
|
||||
; FMA4-LABEL: mul_addsub_ps256:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%AB = fmul <8 x float> %A, %B
|
||||
|
@ -91,8 +91,8 @@ define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
|
|||
;
|
||||
; FMA4-LABEL: mul_addsub_pd512:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%AB = fmul <8 x double> %A, %B
|
||||
|
@ -116,8 +116,8 @@ define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
|
|||
;
|
||||
; FMA4-LABEL: mul_addsub_ps512:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%AB = fmul <16 x float> %A, %B
|
||||
|
@ -135,7 +135,7 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D,
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_addsub_ps128:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <4 x float> %C, %D
|
||||
|
@ -166,7 +166,7 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_addsub_pd128:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <2 x double> %C, %D
|
||||
|
@ -189,7 +189,7 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D,
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_addsub_ps256:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <8 x float> %C, %D
|
||||
|
@ -236,7 +236,7 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_addsub_pd256:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <4 x double> %C, %D
|
||||
|
@ -273,8 +273,8 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_addsub_ps512:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmaddsubps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
|
||||
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <16 x float> %C, %D
|
||||
|
@ -359,8 +359,8 @@ define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_addsub_pd512:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmaddsubpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddsubpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm4
|
||||
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <8 x double> %C, %D
|
||||
|
@ -404,7 +404,7 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D,
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_subadd_ps128:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <4 x float> %C, %D
|
||||
|
@ -435,7 +435,7 @@ define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_subadd_pd128:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <2 x double> %C, %D
|
||||
|
@ -458,7 +458,7 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_subadd_ps256:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <8 x float> %C, %D
|
||||
|
@ -505,7 +505,7 @@ define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_subadd_pd256:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <4 x double> %C, %D
|
||||
|
@ -542,8 +542,8 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_subadd_ps512:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
|
||||
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <16 x float> %C, %D
|
||||
|
@ -628,8 +628,8 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
|
|||
;
|
||||
; FMA4-LABEL: buildvector_mul_subadd_pd512:
|
||||
; FMA4: # %bb.0: # %bb
|
||||
; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
|
||||
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
|
||||
; FMA4-NEXT: retq
|
||||
bb:
|
||||
%A = fmul <8 x double> %C, %D
|
||||
|
|
|
@ -22,7 +22,7 @@ define float @f1(float %0, float %1, float %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f1:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg float %0
|
||||
|
@ -50,7 +50,7 @@ define double @f2(double %0, double %1, double %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f2:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg double %0
|
||||
|
@ -78,7 +78,7 @@ define float @f3(float %0, float %1, float %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f3:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg float %2
|
||||
|
@ -106,7 +106,7 @@ define double @f4(double %0, double %1, double %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f4:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg double %2
|
||||
|
@ -136,7 +136,7 @@ define float @f5(float %0, float %1, float %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f5:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg float %0
|
||||
|
@ -167,7 +167,7 @@ define double @f6(double %0, double %1, double %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f6:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg double %0
|
||||
|
@ -197,7 +197,7 @@ define float @f7(float %0, float %1, float %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f7:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
|
@ -234,7 +234,7 @@ define double @f8(double %0, double %1, double %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f8:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
|
@ -267,7 +267,7 @@ define float @f9(float %0, float %1, float %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f9:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
|
@ -309,7 +309,7 @@ define double @f10(double %0, double %1, double %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f10:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
|
@ -400,7 +400,7 @@ define float @f15() #0 {
|
|||
; FMA4-LABEL: f15:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.fmuladd.f32(
|
||||
|
@ -432,7 +432,7 @@ define double @f16() #0 {
|
|||
; FMA4-LABEL: f16:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.fmuladd.f64(
|
||||
|
@ -468,7 +468,7 @@ define float @f17() #0 {
|
|||
; FMA4-LABEL: f17:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; FMA4-NEXT: vfmaddss %xmm0, %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%result = call float @llvm.experimental.constrained.fma.f32(
|
||||
|
@ -504,7 +504,7 @@ define double @f18() #0 {
|
|||
; FMA4-LABEL: f18:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; FMA4-NEXT: vfmaddsd %xmm0, %xmm0, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%result = call double @llvm.experimental.constrained.fma.f64(
|
||||
|
@ -568,7 +568,7 @@ define <4 x float> @f19(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f19:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg <4 x float> %0
|
||||
|
@ -610,7 +610,7 @@ define <2 x double> @f20(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f20:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg <2 x double> %0
|
||||
|
@ -672,7 +672,7 @@ define <4 x float> @f21(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f21:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg <4 x float> %2
|
||||
|
@ -714,7 +714,7 @@ define <2 x double> @f22(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f22:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg <2 x double> %2
|
||||
|
@ -778,7 +778,7 @@ define <4 x float> @f23(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f23:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg <4 x float> %0
|
||||
|
@ -823,7 +823,7 @@ define <2 x double> @f24(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f24:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
%3 = fneg <2 x double> %0
|
||||
|
@ -887,7 +887,7 @@ define <4 x float> @f25(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f25:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
|
@ -938,7 +938,7 @@ define <2 x double> @f26(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f26:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
|
||||
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
|
@ -1005,7 +1005,7 @@ define <4 x float> @f27(<4 x float> %0, <4 x float> %1, <4 x float> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f27:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
|
@ -1061,7 +1061,7 @@ define <2 x double> @f28(<2 x double> %0, <2 x double> %1, <2 x double> %2) #0 {
|
|||
;
|
||||
; FMA4-LABEL: f28:
|
||||
; FMA4: # %bb.0: # %entry
|
||||
; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
|
||||
; FMA4-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
entry:
|
||||
|
|
|
@ -14,8 +14,8 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c)
|
|||
; CHECK-NEXT: .cfi_def_cfa_register %ebp
|
||||
; CHECK-NEXT: andl $-32, %esp
|
||||
; CHECK-NEXT: subl $32, %esp
|
||||
; CHECK-NEXT: vfmaddps 8(%ebp), %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vfmaddps 40(%ebp), %ymm3, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + mem
|
||||
; CHECK-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + mem
|
||||
; CHECK-NEXT: movl %ebp, %esp
|
||||
; CHECK-NEXT: popl %ebp
|
||||
; CHECK-NEXT: retl
|
||||
|
|
Loading…
Reference in New Issue