forked from OSchip/llvm-project
[DAGCombiner] do not fold (fmul (fadd X, 1), Y) -> (fmad X, Y, Y) by default
Summary: When X = 0 and Y = inf, the original code produces inf, but the transformed code produces nan. So this transform (and its relatives) should only be used when the no-infs-fp-math flag is explicitly enabled. Also disable the transform using fmad (intermediate rounding) when unsafe-math is not enabled, since it can reduce the precision of the result; consider this example with binary floating point numbers with two bits of mantissa: x = 1.01 y = 111 x * (y + 1) = 1.01 * 1000 = 1010 (this is the exact result; no rounding occurs at any step) x * y + x = 1000.11 + 1.01 =r 1000 + 1.01 = 1001.01 =r 1000 (with rounding towards zero) The example relies on rounding towards zero at least in the second step. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98578 Reviewers: RKSimon, tstellarAMD, spatel, arsenm Subscribers: wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D26602 llvm-svn: 288506
This commit is contained in:
parent
9cb74267ac
commit
33ca182c91
|
@ -8392,17 +8392,23 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
|
|||
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
|
||||
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
bool AllowFusion =
|
||||
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
|
||||
|
||||
// Floating-point multiply-add with intermediate rounding.
|
||||
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
|
||||
// The transforms below are incorrect when x == 0 and y == inf, because the
|
||||
// intermediate multiplication produces a nan.
|
||||
if (!Options.NoInfsFPMath)
|
||||
return SDValue();
|
||||
|
||||
// Floating-point multiply-add without intermediate rounding.
|
||||
bool HasFMA =
|
||||
AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
|
||||
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
|
||||
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
|
||||
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
|
||||
|
||||
// Floating-point multiply-add with intermediate rounding. This can result
|
||||
// in a less precise result due to the changed rounding order.
|
||||
bool HasFMAD = Options.UnsafeFPMath &&
|
||||
(LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
|
||||
|
||||
// No valid opcode, do not combine.
|
||||
if (!HasFMAD && !HasFMA)
|
||||
return SDValue();
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be
|
||||
; beneficial even without fp32 denormals, but they do require no-infs-fp-math
|
||||
; for correctness.
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare double @llvm.fabs.f64(double) #0
|
||||
|
@ -369,7 +374,10 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %
|
|||
;
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
|
||||
; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
|
||||
define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -382,7 +390,10 @@ define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
|
||||
; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
|
||||
define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -395,7 +406,10 @@ define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
|
||||
define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -408,7 +422,10 @@ define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
|
||||
define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -421,7 +438,10 @@ define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
|
||||
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
|
||||
define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -434,7 +454,10 @@ define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
|
||||
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
|
||||
define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -447,7 +470,10 @@ define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
|
||||
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
|
||||
define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -460,7 +486,10 @@ define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
|
||||
; SI-NOFMA: v_sub_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
|
||||
define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -473,7 +502,10 @@ define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
|
||||
define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -486,7 +518,10 @@ define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
|
||||
; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], -1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
|
||||
define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -499,7 +534,10 @@ define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
|
||||
; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
|
||||
define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -512,7 +550,10 @@ define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
|
||||
; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_add_f32_e32 [[VS:v[0-9]]], 1.0, [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
|
||||
define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) {
|
||||
|
@ -529,8 +570,12 @@ define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
|
|||
;
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f32_interp:
|
||||
; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
|
||||
; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]]
|
||||
; SI-NOFMA: v_sub_f32_e32 [[VT1:v[0-9]]], 1.0, [[VT:v[0-9]]]
|
||||
; SI-NOFMA: v_mul_f32_e32 [[VTY:v[0-9]]], [[VT1]], [[VY:v[0-9]]]
|
||||
; SI-NOFMA: v_mac_f32_e32 [[VTY]], [[VT]], [[VX:v[0-9]]]
|
||||
;
|
||||
; SI-FMA: v_fma_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
|
||||
; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VT]], [[VR]]
|
||||
define void @test_f32_interp(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2,
|
||||
|
@ -547,8 +592,12 @@ define void @test_f32_interp(float addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}test_f64_interp:
|
||||
; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
|
||||
; SI: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
|
||||
; SI-NOFMA: v_add_f64 [[VT1:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], 1.0
|
||||
; SI-NOFMA: v_mul_f64 [[VTY:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VT1]]
|
||||
; SI-NOFMA: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VTY]]
|
||||
;
|
||||
; SI-FMA: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
|
||||
; SI-FMA: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
|
||||
define void @test_f64_interp(double addrspace(1)* %out,
|
||||
double addrspace(1)* %in1,
|
||||
double addrspace(1)* %in2,
|
||||
|
|
|
@ -560,17 +560,20 @@ define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <
|
|||
define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_add_x_one_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
%m = fmul <4 x float> %a, %y
|
||||
|
@ -580,17 +583,20 @@ define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_y_add_x_one:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
%m = fmul <4 x float> %y, %a
|
||||
|
@ -600,17 +606,20 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
|
||||
%m = fmul <4 x float> %a, %y
|
||||
|
@ -620,17 +629,20 @@ define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
|
||||
%m = fmul <4 x float> %y, %a
|
||||
|
@ -640,17 +652,23 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
%m = fmul <4 x float> %s, %y
|
||||
|
@ -660,17 +678,23 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
%m = fmul <4 x float> %y, %s
|
||||
|
@ -680,17 +704,23 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
|
||||
%m = fmul <4 x float> %s, %y
|
||||
|
@ -700,17 +730,23 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||
; AVX512-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
|
||||
%m = fmul <4 x float> %y, %s
|
||||
|
@ -720,17 +756,20 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
%m = fmul <4 x float> %s, %y
|
||||
|
@ -740,17 +779,20 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
%m = fmul <4 x float> %y, %s
|
||||
|
@ -760,17 +802,20 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
|
|||
define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
|
||||
%m = fmul <4 x float> %s, %y
|
||||
|
@ -780,17 +825,20 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y
|
|||
define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
|
||||
; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; FMA-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
|
||||
; FMA4-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; FMA4-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
|
||||
%m = fmul <4 x float> %y, %s
|
||||
|
@ -804,19 +852,25 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y
|
|||
define float @test_f32_interp(float %x, float %y, float %t) {
|
||||
; FMA-LABEL: test_f32_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
|
||||
; FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; FMA-NEXT: vsubss %xmm2, %xmm3, %xmm3
|
||||
; FMA-NEXT: vmulss %xmm3, %xmm1, %xmm1
|
||||
; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_f32_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; FMA4-NEXT: vsubss %xmm2, %xmm3, %xmm3
|
||||
; FMA4-NEXT: vmulss %xmm3, %xmm1, %xmm1
|
||||
; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f32_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; AVX512-NEXT: vsubss %xmm2, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub float 1.0, %t
|
||||
|
@ -829,19 +883,25 @@ define float @test_f32_interp(float %x, float %y, float %t) {
|
|||
define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
|
||||
; FMA-LABEL: test_v4f32_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
||||
; FMA-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
||||
; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f32_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
||||
; FMA4-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
||||
; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f32_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm3
|
||||
; AVX512-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
|
@ -854,19 +914,25 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
|
|||
define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
|
||||
; FMA-LABEL: test_v8f32_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f32_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f32_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
|
||||
; AVX512-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
||||
; AVX512-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; AVX512-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
|
@ -879,19 +945,25 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
|
|||
define double @test_f64_interp(double %x, double %y, double %t) {
|
||||
; FMA-LABEL: test_f64_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
|
||||
; FMA-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; FMA-NEXT: vsubsd %xmm2, %xmm3, %xmm3
|
||||
; FMA-NEXT: vmulsd %xmm3, %xmm1, %xmm1
|
||||
; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_f64_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; FMA4-NEXT: vsubsd %xmm2, %xmm3, %xmm3
|
||||
; FMA4-NEXT: vmulsd %xmm3, %xmm1, %xmm1
|
||||
; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_f64_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
|
||||
; AVX512-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; AVX512-NEXT: vsubsd %xmm2, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vmulsd %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub double 1.0, %t
|
||||
|
@ -904,19 +976,25 @@ define double @test_f64_interp(double %x, double %y, double %t) {
|
|||
define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
|
||||
; FMA-LABEL: test_v2f64_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||
; FMA-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||
; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v2f64_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||
; FMA4-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||
; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v2f64_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1
|
||||
; AVX512-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
|
||||
; AVX512-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||
; AVX512-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
|
||||
|
@ -929,19 +1007,25 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
|||
define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
|
||||
; FMA-LABEL: test_v4f64_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
||||
; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v4f64_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
||||
; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v4f64_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1
|
||||
; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %ymm3
|
||||
; AVX512-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
||||
; AVX512-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||
; AVX512-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
|
||||
|
|
|
@ -255,19 +255,26 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <
|
|||
define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-LABEL: test_v16f32_mul_add_x_one_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
%m = fmul <16 x float> %a, %y
|
||||
|
@ -277,19 +284,26 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %
|
|||
define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-LABEL: test_v8f64_mul_y_add_x_one:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
|
||||
%m = fmul <8 x double> %y, %a
|
||||
|
@ -299,19 +313,26 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y
|
|||
define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
|
||||
%m = fmul <16 x float> %a, %y
|
||||
|
@ -321,19 +342,26 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float
|
|||
define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
|
||||
%m = fmul <8 x double> %y, %a
|
||||
|
@ -343,19 +371,27 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double>
|
|||
define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||
; FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||
; FMA4-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %zmm2
|
||||
; AVX512-NEXT: vsubps %zmm0, %zmm2, %zmm0
|
||||
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||
%m = fmul <16 x float> %s, %y
|
||||
|
@ -365,19 +401,27 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %
|
|||
define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||
; FMA-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||
; FMA4-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %zmm2
|
||||
; AVX512-NEXT: vsubpd %zmm0, %zmm2, %zmm0
|
||||
; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
|
||||
%m = fmul <8 x double> %y, %s
|
||||
|
@ -387,19 +431,27 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y
|
|||
define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||
; FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||
; FMA4-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %zmm2
|
||||
; AVX512-NEXT: vsubps %zmm0, %zmm2, %zmm0
|
||||
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
|
||||
%m = fmul <16 x float> %s, %y
|
||||
|
@ -409,19 +461,27 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float
|
|||
define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||
; FMA-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||
; FMA4-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %zmm2
|
||||
; AVX512-NEXT: vsubpd %zmm0, %zmm2, %zmm0
|
||||
; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
|
||||
%m = fmul <8 x double> %y, %s
|
||||
|
@ -431,19 +491,26 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double>
|
|||
define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
|
||||
%m = fmul <16 x float> %s, %y
|
||||
|
@ -453,19 +520,26 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %
|
|||
define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
|
||||
%m = fmul <8 x double> %y, %s
|
||||
|
@ -475,19 +549,26 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y
|
|||
define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
||||
; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
|
||||
%m = fmul <16 x float> %s, %y
|
||||
|
@ -497,19 +578,26 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float
|
|||
define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
|
||||
; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
|
||||
; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||
; FMA4-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||
; FMA4-NEXT: vmulpd %ymm1, %ymm3, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
|
||||
; AVX512-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
|
||||
%m = fmul <8 x double> %y, %s
|
||||
|
@ -523,23 +611,31 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double>
|
|||
define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
|
||||
; FMA-LABEL: test_v16f32_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3
|
||||
; FMA-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2
|
||||
; FMA-NEXT: vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubps %ymm4, %ymm6, %ymm7
|
||||
; FMA-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
||||
; FMA-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
||||
; FMA-NEXT: vmulps %ymm7, %ymm2, %ymm2
|
||||
; FMA-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0
|
||||
; FMA-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v16f32_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
|
||||
; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
|
||||
; FMA4-NEXT: vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubps %ymm4, %ymm6, %ymm7
|
||||
; FMA4-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
||||
; FMA4-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
||||
; FMA4-NEXT: vmulps %ymm7, %ymm2, %ymm2
|
||||
; FMA4-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16f32_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm2, %zmm1
|
||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %zmm3
|
||||
; AVX512-NEXT: vsubps %zmm2, %zmm3, %zmm3
|
||||
; AVX512-NEXT: vmulps %zmm3, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vfmadd213ps %zmm1, %zmm2, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
|
@ -552,23 +648,31 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
|
|||
define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
|
||||
; FMA-LABEL: test_v8f64_interp:
|
||||
; FMA: # BB#0:
|
||||
; FMA-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3
|
||||
; FMA-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2
|
||||
; FMA-NEXT: vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA-NEXT: vsubpd %ymm4, %ymm6, %ymm7
|
||||
; FMA-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
||||
; FMA-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
||||
; FMA-NEXT: vmulpd %ymm7, %ymm2, %ymm2
|
||||
; FMA-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0
|
||||
; FMA-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1
|
||||
; FMA-NEXT: retq
|
||||
;
|
||||
; FMA4-LABEL: test_v8f64_interp:
|
||||
; FMA4: # BB#0:
|
||||
; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
|
||||
; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
|
||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
||||
; FMA4-NEXT: vsubpd %ymm4, %ymm6, %ymm7
|
||||
; FMA4-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
||||
; FMA4-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
||||
; FMA4-NEXT: vmulpd %ymm7, %ymm2, %ymm2
|
||||
; FMA4-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
|
||||
; FMA4-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
|
||||
; FMA4-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8f64_interp:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm2, %zmm1
|
||||
; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %zmm3
|
||||
; AVX512-NEXT: vsubpd %zmm2, %zmm3, %zmm3
|
||||
; AVX512-NEXT: vmulpd %zmm3, %zmm1, %zmm1
|
||||
; AVX512-NEXT: vfmadd213pd %zmm1, %zmm2, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
|
||||
|
|
Loading…
Reference in New Issue