forked from OSchip/llvm-project
Migrate some more fadd and fsub cases away from UnsafeFPMath control to utilize NoSignedZerosFPMath options control
Summary: Honoring no signed zeroes is also available as a user control through clang separately regardless of fastmath or UnsafeFPMath context, DAG guards should reflect this context. Reviewers: spatel, arsenm, hfinkel, wristow, craig.topper Reviewed By: spatel Subscribers: rampitec, foad, nhaehnle, wuzish, nemanjai, jvesely, wdng, javed.absar, MaskRay, jsji Differential Revision: https://reviews.llvm.org/D65170 llvm-svn: 367486
This commit is contained in:
parent
f8e7b53657
commit
005d705d43
|
@ -839,7 +839,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
|
|||
});
|
||||
}
|
||||
case ISD::FADD:
|
||||
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
|
||||
if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
|
||||
return 0;
|
||||
|
||||
// After operation legalization, it might not be legal to create new FSUBs.
|
||||
|
@ -912,7 +912,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
|
||||
}
|
||||
case ISD::FADD:
|
||||
assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
|
||||
assert(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros());
|
||||
|
||||
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
|
||||
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
|
||||
|
@ -12017,7 +12017,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
|||
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
|
||||
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
|
||||
if (N1C && N1C->isZero())
|
||||
if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
|
||||
if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
|
||||
return N0;
|
||||
|
||||
if (SDValue NewSel = foldBinOpIntoSelect(N))
|
||||
|
@ -12075,7 +12075,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
|||
// If 'unsafe math' or reassoc and nsz, fold lots of things.
|
||||
// TODO: break out portions of the transformations below for which Unsafe is
|
||||
// considered and which do not require both nsz and reassoc
|
||||
if ((Options.UnsafeFPMath ||
|
||||
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
|
||||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
|
||||
AllowNewConst) {
|
||||
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
|
||||
|
@ -12194,7 +12194,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
|||
|
||||
// (fsub A, 0) -> A
|
||||
if (N1CFP && N1CFP->isZero()) {
|
||||
if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
|
||||
if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
|
||||
Flags.hasNoSignedZeros()) {
|
||||
return N0;
|
||||
}
|
||||
|
@ -12221,7 +12221,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
if ((Options.UnsafeFPMath ||
|
||||
if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
|
||||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
|
||||
&& N1.getOpcode() == ISD::FADD) {
|
||||
// X - (X + Y) -> -Y
|
||||
|
|
|
@ -4630,7 +4630,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
return getUNDEF(VT);
|
||||
|
||||
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
|
||||
if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
|
||||
if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
|
||||
OpOpcode == ISD::FSUB)
|
||||
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
|
||||
Operand.getOperand(0), Flags);
|
||||
|
|
|
@ -146,28 +146,23 @@ define float @fadd_const_multiuse_fmf(float %x) {
|
|||
ret float %a3
|
||||
}
|
||||
|
||||
; DAGCombiner transforms this into: (x + 59.0) + (x + 17.0).
|
||||
; The machine combiner transforms this into a chain of 3 dependent adds:
|
||||
; ((x + 59.0) + 17.0) + x
|
||||
|
||||
define float @fadd_const_multiuse_attr(float %x) #0 {
|
||||
; DAGCombiner transforms this into: (x + 17.0) + (x + 59.0).
|
||||
define float @fadd_const_multiuse_attr(float %x) {
|
||||
; CHECK-LABEL: fadd_const_multiuse_attr:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
|
||||
; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696
|
||||
; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]]
|
||||
; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144
|
||||
; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]]
|
||||
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP59]]
|
||||
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]]
|
||||
; CHECK-NEXT: fadd s0, s0, [[TMP2]]
|
||||
; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]]
|
||||
; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP17]]
|
||||
; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]]
|
||||
; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret
|
||||
%a1 = fadd float %x, 42.0
|
||||
%a2 = fadd float %a1, 17.0
|
||||
%a3 = fadd float %a1, %a2
|
||||
%a1 = fadd fast float %x, 42.0
|
||||
%a2 = fadd fast float %a1, 17.0
|
||||
%a3 = fadd fast float %a1, %a2
|
||||
ret float %a3
|
||||
}
|
||||
|
||||
attributes #0 = { "unsafe-fp-math"="true" }
|
||||
|
||||
declare void @use(double)
|
||||
|
||||
|
|
|
@ -1,17 +1,29 @@
|
|||
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s
|
||||
; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
|
||||
; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck --check-prefixes=GCN,GCN-FMF,GCN-SAFE %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; Test that the -enable-no-signed-zeros-fp-math flag works
|
||||
|
||||
; GCN-LABEL: {{^}}fneg_fsub_f32:
|
||||
; GCN-LABEL: {{^}}fneg_fsub_f32_fmf:
|
||||
; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; GCN-FMF-NOT: xor
|
||||
define amdgpu_kernel void @fneg_fsub_f32_fmf(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%add = add i32 %tid, 1
|
||||
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add
|
||||
%a = load float, float addrspace(1)* %gep, align 4
|
||||
%b = load float, float addrspace(1)* %b_ptr, align 4
|
||||
%result = fsub fast float %a, %b
|
||||
%neg.result = fsub fast float -0.0, %result
|
||||
store float %neg.result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fneg_fsub_f32_safe:
|
||||
; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
|
||||
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
|
||||
|
||||
; GCN-UNSAFE-NOT: xor
|
||||
define amdgpu_kernel void @fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
define amdgpu_kernel void @fneg_fsub_f32_safe(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%add = add i32 %tid, 1
|
||||
%gep = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.fabs.f64(double %Val)
|
||||
declare double @llvm.floor.f64(double) nounwind readnone
|
||||
|
@ -20,7 +20,7 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
|
|||
; SI: v_add_f64
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
|
||||
%y = call double @llvm.floor.f64(double %x) nounwind readnone
|
||||
%y = call fast double @llvm.floor.f64(double %x) nounwind readnone
|
||||
store double %y, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -35,8 +35,8 @@ define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
|
|||
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
|
||||
%neg = fsub double 0.0, %x
|
||||
%y = call double @llvm.floor.f64(double %neg) nounwind readnone
|
||||
%neg = fsub nsz double 0.0, %x
|
||||
%y = call fast double @llvm.floor.f64(double %neg) nounwind readnone
|
||||
store double %y, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -51,9 +51,9 @@ define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x)
|
|||
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
|
||||
%abs = call double @llvm.fabs.f64(double %x)
|
||||
%neg = fsub double 0.0, %abs
|
||||
%y = call double @llvm.floor.f64(double %neg) nounwind readnone
|
||||
%abs = call fast double @llvm.fabs.f64(double %x)
|
||||
%neg = fsub nsz double 0.0, %abs
|
||||
%y = call fast double @llvm.floor.f64(double %neg) nounwind readnone
|
||||
store double %y, double addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double
|
|||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
|
||||
%y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
|
||||
%y = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
|
||||
store <2 x double> %y, <2 x double> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -73,7 +73,7 @@ define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x do
|
|||
; CI: v_floor_f64_e32
|
||||
; CI-NOT: v_floor_f64_e32
|
||||
define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
|
||||
%y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
|
||||
%y = call fast <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
|
||||
store <3 x double> %y, <3 x double> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x do
|
|||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
|
||||
%y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
|
||||
%y = call fast <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
|
||||
store <4 x double> %y, <4 x double> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x do
|
|||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
|
||||
%y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
|
||||
%y = call fast <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
|
||||
store <8 x double> %y, <8 x double> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x do
|
|||
; CI: v_floor_f64_e32
|
||||
; CI: v_floor_f64_e32
|
||||
define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
|
||||
%y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
|
||||
%y = call fast <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
|
||||
store <16 x double> %y, <16 x double> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -219,8 +219,11 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %
|
|||
; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
|
||||
; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
|
||||
; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
|
||||
; GCN-NSZ-DAG: v_mac_f32_e32 [[C:v[0-9]+]],
|
||||
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[C]]
|
||||
; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
|
||||
; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
|
||||
; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]],
|
||||
; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]],
|
||||
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
|
||||
|
||||
define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
|
||||
.entry:
|
||||
|
|
|
@ -3,19 +3,26 @@
|
|||
; same as the FMA target register. The second one is legal. The third
|
||||
; one doesn't fit the feeding-copy pattern.
|
||||
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
declare double @llvm.sqrt.f64(double)
|
||||
|
||||
define double @foo3(double %a) nounwind {
|
||||
%r = call double @llvm.sqrt.f64(double %a)
|
||||
ret double %r
|
||||
|
||||
; CHECK: @foo3
|
||||
define double @foo3_fmf(double %a) nounwind {
|
||||
; CHECK: @foo3_fmf
|
||||
; CHECK-NOT: fmr
|
||||
; CHECK: xsmaddmdp
|
||||
; CHECK: xsmaddadp
|
||||
%r = call fast double @llvm.sqrt.f64(double %a)
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @foo3_safe(double %a) nounwind {
|
||||
; CHECK: @foo3_safe
|
||||
; CHECK-NOT: fmr
|
||||
; CHECK: xssqrtdp
|
||||
%r = call double @llvm.sqrt.f64(double %a)
|
||||
ret double %r
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
|
||||
; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF
|
||||
; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
|
||||
; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBAL
|
||||
; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
|
||||
|
||||
; Test FP transforms using instruction/node-level fast-math-flags.
|
||||
; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
|
||||
|
|
|
@ -1,96 +1,103 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
define <4 x double> @foo_fmf(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: @foo_fmf
|
||||
; CHECK: qvfrsqrte
|
||||
; CHECK-DAG: qvfmul
|
||||
; CHECK-DAG: qvfmsub
|
||||
; CHECK-DAG: qvfnmsub
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfnmsub
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmul
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%r = fdiv fast <4 x double> %a, %x
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo_safe(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: @foo_safe
|
||||
; CHECK: fsqrt
|
||||
; CHECK: fdiv
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%r = fdiv <4 x double> %a, %x
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK: qvfrsqrte
|
||||
; CHECK-DAG: qvfmul
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
; an qvfmadd instead of a qvfnmsub
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmadd
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmul
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @foo
|
||||
; CHECK-SAFE: fsqrt
|
||||
; CHECK-SAFE: fdiv
|
||||
; CHECK-SAFE: blr
|
||||
}
|
||||
|
||||
define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind {
|
||||
define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: @foof_fmf
|
||||
; CHECK: qvfrsqrtes
|
||||
; CHECK-DAG: qvfmuls
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
; an qvfmadd instead of a qvfnmsubs
|
||||
; CHECK-DAG: qvfmadds
|
||||
; CHECK-DAG: qvfmadds
|
||||
; CHECK: qvfmuls
|
||||
; CHECK: qvfmul
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%y = fpext <4 x float> %x to <4 x double>
|
||||
%r = fdiv fast <4 x double> %a, %y
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foof_safe(<4 x double> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: @foof_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: fdiv
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%y = fpext <4 x float> %x to <4 x double>
|
||||
%r = fdiv <4 x double> %a, %y
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @foof
|
||||
; CHECK: qvfrsqrtes
|
||||
; CHECK-DAG: qvfmuls
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
; an qvfmadd instead of a qvfnmsubs
|
||||
; CHECK-DAG: qvfmadds
|
||||
; CHECK-DAG: qvfmadds
|
||||
; CHECK: qvfmuls
|
||||
; CHECK: qvfmul
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @foof
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: fdiv
|
||||
; CHECK-SAFE: blr
|
||||
}
|
||||
|
||||
define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind {
|
||||
define <4 x float> @food_fmf(<4 x float> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: @food_fmf
|
||||
; CHECK: qvfrsqrte
|
||||
; CHECK-DAG: qvfmul
|
||||
; CHECK-DAG: qvfmsub
|
||||
; CHECK-DAG: qvfnmsub
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfnmsub
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfrsp
|
||||
; CHECK: qvfmuls
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%x = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%y = fptrunc <4 x double> %x to <4 x float>
|
||||
%r = fdiv fast <4 x float> %a, %y
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @food_safe(<4 x float> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: @food_safe
|
||||
; CHECK: fsqrt
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
|
||||
%y = fptrunc <4 x double> %x to <4 x float>
|
||||
%r = fdiv <4 x float> %a, %y
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @food
|
||||
; CHECK: qvfrsqrte
|
||||
; CHECK-DAG: qvfmul
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
; an qvfmadd instead of a qvfnmsub
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfmadd
|
||||
; CHECK: qvfmul
|
||||
; CHECK: qvfrsp
|
||||
; CHECK: qvfmuls
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @food
|
||||
; CHECK-SAFE: fsqrt
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
}
|
||||
|
||||
define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
entry:
|
||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @goo
|
||||
define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: @goo_fmf
|
||||
; CHECK: qvfrsqrtes
|
||||
; CHECK-DAG: qvfmuls
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
|
@ -100,19 +107,25 @@ entry:
|
|||
; CHECK: qvfmuls
|
||||
; CHECK: qvfmuls
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @goo
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
entry:
|
||||
%x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv fast <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
define <4 x float> @goo_safe(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: @goo_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%r = fdiv <4 x double> %a, %b
|
||||
ret <4 x double> %r
|
||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @foo2
|
||||
define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: @foo2_fmf
|
||||
; CHECK: qvfre
|
||||
; CHECK: qvfnmsub
|
||||
; CHECK: qvfmadd
|
||||
|
@ -120,61 +133,70 @@ entry:
|
|||
; CHECK: qvfmadd
|
||||
; CHECK: qvfmul
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @foo2
|
||||
; CHECK-SAFE: fdiv
|
||||
; CHECK-SAFE: blr
|
||||
entry:
|
||||
%r = fdiv fast <4 x double> %a, %b
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
entry:
|
||||
%r = fdiv <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
define <4 x double> @foo2_safe(<4 x double> %a, <4 x double> %b) nounwind {
|
||||
; CHECK-LABEL: @foo2_safe
|
||||
; CHECK: fdiv
|
||||
; CHECK: blr
|
||||
%r = fdiv <4 x double> %a, %b
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @goo2
|
||||
define <4 x float> @goo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: @goo2_fmf
|
||||
; CHECK: qvfres
|
||||
; CHECK: qvfnmsubs
|
||||
; CHECK: qvfmadds
|
||||
; CHECK: qvfmuls
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @goo2
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
entry:
|
||||
%r = fdiv fast <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x double> @foo3(<4 x double> %a) nounwind {
|
||||
define <4 x float> @goo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: @goo2_safe
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
%r = fdiv <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @foo3
|
||||
define <4 x double> @foo3_fmf(<4 x double> %a) nounwind {
|
||||
; CHECK-LABEL: @foo3_fmf
|
||||
; CHECK: qvfrsqrte
|
||||
; CHECK: qvfmul
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
; an qvfmadd instead of a qvfnmsub
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK-DAG: qvfmsub
|
||||
; CHECK-DAG: qvfcmpeq
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK-DAG: qvfnmsub
|
||||
; CHECK-DAG: qvfmul
|
||||
; CHECK-DAG: qvfmul
|
||||
; CHECK-DAG: qvfmadd
|
||||
; CHECK-DAG: qvfnmsub
|
||||
; CHECK-DAG: qvfmul
|
||||
; CHECK-DAG: qvfmul
|
||||
; CHECK: qvfsel
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @foo3
|
||||
; CHECK-SAFE: fsqrt
|
||||
; CHECK-SAFE: blr
|
||||
entry:
|
||||
%r = call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo3(<4 x float> %a) nounwind {
|
||||
define <4 x double> @foo3_safe(<4 x double> %a) nounwind {
|
||||
; CHECK-LABEL: @foo3_safe
|
||||
; CHECK: fsqrt
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
%r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @goo3
|
||||
define <4 x float> @goo3_fmf(<4 x float> %a) nounwind {
|
||||
; CHECK-LABEL: @goo3_fmf
|
||||
; CHECK: qvfrsqrtes
|
||||
; CHECK: qvfmuls
|
||||
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
|
||||
|
@ -186,9 +208,17 @@ entry:
|
|||
; CHECK-DAG: qvfmuls
|
||||
; CHECK: qvfsel
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE-LABEL: @goo3
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: blr
|
||||
entry:
|
||||
%r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @goo3_safe(<4 x float> %a) nounwind {
|
||||
; CHECK-LABEL: @goo3_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
@ -8,12 +7,8 @@ declare double @llvm.sqrt.f64(double)
|
|||
declare float @llvm.sqrt.f32(float)
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
||||
|
||||
define double @foo(double %a, double %b) nounwind {
|
||||
%x = call double @llvm.sqrt.f64(double %b)
|
||||
%r = fdiv double %a, %x
|
||||
ret double %r
|
||||
|
||||
; CHECK: @foo
|
||||
define double @foo_fmf(double %a, double %b) nounwind {
|
||||
; CHECK: @foo_fmf
|
||||
; CHECK: frsqrte
|
||||
; CHECK: fmul
|
||||
; CHECK-NEXT: fmadd
|
||||
|
@ -25,34 +20,35 @@ define double @foo(double %a, double %b) nounwind {
|
|||
; CHECK-NEXT: fmul
|
||||
; CHECK-NEXT: fmul
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE: @foo
|
||||
; CHECK-SAFE: fsqrt
|
||||
; CHECK-SAFE: fdiv
|
||||
; CHECK-SAFE: blr
|
||||
%x = call fast double @llvm.sqrt.f64(double %b)
|
||||
%r = fdiv fast double %a, %x
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @no_estimate_refinement_f64(double %a, double %b) #0 {
|
||||
define double @foo_safe(double %a, double %b) nounwind {
|
||||
; CHECK: @foo_safe
|
||||
; CHECK: fsqrt
|
||||
; CHECK: fdiv
|
||||
; CHECK: blr
|
||||
%x = call double @llvm.sqrt.f64(double %b)
|
||||
%r = fdiv double %a, %x
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @no_estimate_refinement_f64(double %a, double %b) #0 {
|
||||
; CHECK-LABEL: @no_estimate_refinement_f64
|
||||
; CHECK: frsqrte
|
||||
; CHECK-NOT: fmadd
|
||||
; CHECK: fmul
|
||||
; CHECK-NOT: fmadd
|
||||
; CHECK: blr
|
||||
%x = call fast double @llvm.sqrt.f64(double %b)
|
||||
%r = fdiv fast double %a, %x
|
||||
ret double %r
|
||||
}
|
||||
|
||||
|
||||
define double @foof(double %a, float %b) nounwind {
|
||||
%x = call float @llvm.sqrt.f32(float %b)
|
||||
%y = fpext float %x to double
|
||||
%r = fdiv double %a, %y
|
||||
ret double %r
|
||||
|
||||
; CHECK: @foof
|
||||
define double @foof_fmf(double %a, float %b) nounwind {
|
||||
; CHECK: @foof_fmf
|
||||
; CHECK-DAG: frsqrtes
|
||||
; CHECK: fmuls
|
||||
; CHECK-NEXT: fmadds
|
||||
|
@ -60,20 +56,25 @@ define double @foof(double %a, float %b) nounwind {
|
|||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: fmul
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-SAFE: @foof
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: fdiv
|
||||
; CHECK-SAFE: blr
|
||||
%x = call fast float @llvm.sqrt.f32(float %b)
|
||||
%y = fpext float %x to double
|
||||
%r = fdiv fast double %a, %y
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define float @food(float %a, double %b) nounwind {
|
||||
%x = call double @llvm.sqrt.f64(double %b)
|
||||
%y = fptrunc double %x to float
|
||||
%r = fdiv float %a, %y
|
||||
ret float %r
|
||||
define double @foof_safe(double %a, float %b) nounwind {
|
||||
; CHECK: @foof_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: fdiv
|
||||
; CHECK: blr
|
||||
%x = call float @llvm.sqrt.f32(float %b)
|
||||
%y = fpext float %x to double
|
||||
%r = fdiv double %a, %y
|
||||
ret double %r
|
||||
}
|
||||
|
||||
; CHECK: @foo
|
||||
define float @food_fmf(float %a, double %b) nounwind {
|
||||
; CHECK: @food_fmf
|
||||
; CHECK-DAG: frsqrte
|
||||
; CHECK: fmul
|
||||
; CHECK-NEXT: fmadd
|
||||
|
@ -86,19 +87,25 @@ define float @food(float %a, double %b) nounwind {
|
|||
; CHECK-NEXT: frsp
|
||||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-SAFE: @foo
|
||||
; CHECK-SAFE: fsqrt
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
%x = call fast double @llvm.sqrt.f64(double %b)
|
||||
%y = fptrunc double %x to float
|
||||
%r = fdiv fast float %a, %y
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @goo(float %a, float %b) nounwind {
|
||||
%x = call float @llvm.sqrt.f32(float %b)
|
||||
%r = fdiv float %a, %x
|
||||
define float @food_safe(float %a, double %b) nounwind {
|
||||
; CHECK: @food_safe
|
||||
; CHECK: fsqrt
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
%x = call double @llvm.sqrt.f64(double %b)
|
||||
%y = fptrunc double %x to float
|
||||
%r = fdiv float %a, %y
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; CHECK: @goo
|
||||
define float @goo_fmf(float %a, float %b) nounwind {
|
||||
; CHECK: @goo_fmf
|
||||
; CHECK-DAG: frsqrtes
|
||||
; CHECK: fmuls
|
||||
; CHECK-NEXT: fmadds
|
||||
|
@ -106,36 +113,37 @@ define float @goo(float %a, float %b) nounwind {
|
|||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-SAFE: @goo
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
%x = call fast float @llvm.sqrt.f32(float %b)
|
||||
%r = fdiv fast float %a, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
|
||||
define float @no_estimate_refinement_f32(float %a, float %b) #0 {
|
||||
define float @goo_safe(float %a, float %b) nounwind {
|
||||
; CHECK: @goo_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
%x = call float @llvm.sqrt.f32(float %b)
|
||||
%r = fdiv float %a, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @no_estimate_refinement_f32(float %a, float %b) #0 {
|
||||
; CHECK-LABEL: @no_estimate_refinement_f32
|
||||
; CHECK: frsqrtes
|
||||
; CHECK-NOT: fmadds
|
||||
; CHECK: fmuls
|
||||
; CHECK-NOT: fmadds
|
||||
; CHECK: blr
|
||||
%x = call fast float @llvm.sqrt.f32(float %b)
|
||||
%r = fdiv fast float %a, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; Recognize that this is rsqrt(a) * rcp(b) * c,
|
||||
; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
|
||||
define float @rsqrt_fmul(float %a, float %b, float %c) {
|
||||
%x = call float @llvm.sqrt.f32(float %a)
|
||||
%y = fmul float %x, %b
|
||||
%z = fdiv float %c, %y
|
||||
ret float %z
|
||||
|
||||
; CHECK: @rsqrt_fmul
|
||||
define float @rsqrt_fmul_fmf(float %a, float %b, float %c) {
|
||||
; CHECK: @rsqrt_fmul_fmf
|
||||
; CHECK-DAG: frsqrtes
|
||||
; CHECK-DAG: fres
|
||||
; CHECK-DAG: fnmsubs
|
||||
|
@ -146,32 +154,45 @@ define float @rsqrt_fmul(float %a, float %b, float %c) {
|
|||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-SAFE: @rsqrt_fmul
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: fmuls
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
%x = call fast float @llvm.sqrt.f32(float %a)
|
||||
%y = fmul fast float %x, %b
|
||||
%z = fdiv fast float %c, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; Recognize that this is rsqrt(a) * rcp(b) * c,
|
||||
; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
|
||||
define float @rsqrt_fmul_safe(float %a, float %b, float %c) {
|
||||
; CHECK: @rsqrt_fmul_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: fmuls
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
%x = call float @llvm.sqrt.f32(float %a)
|
||||
%y = fmul float %x, %b
|
||||
%z = fdiv float %c, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define <4 x float> @hoo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK: @hoo_fmf
|
||||
; CHECK: vrsqrtefp
|
||||
%x = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv fast <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @hoo_safe(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK: @hoo_safe
|
||||
; CHECK-NOT: vrsqrtefp
|
||||
; CHECK: blr
|
||||
%x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
|
||||
%r = fdiv <4 x float> %a, %x
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK: @hoo
|
||||
; CHECK: vrsqrtefp
|
||||
|
||||
; CHECK-SAFE: @hoo
|
||||
; CHECK-SAFE-NOT: vrsqrtefp
|
||||
; CHECK-SAFE: blr
|
||||
}
|
||||
|
||||
define double @foo2(double %a, double %b) nounwind {
|
||||
%r = fdiv double %a, %b
|
||||
ret double %r
|
||||
|
||||
; CHECK: @foo2
|
||||
define double @foo2_fmf(double %a, double %b) nounwind {
|
||||
; CHECK: @foo2_fmf
|
||||
; CHECK-DAG: fre
|
||||
; CHECK-DAG: fnmsub
|
||||
; CHECK: fmadd
|
||||
|
@ -179,45 +200,54 @@ define double @foo2(double %a, double %b) nounwind {
|
|||
; CHECK-NEXT: fmadd
|
||||
; CHECK-NEXT: fmul
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-SAFE: @foo2
|
||||
; CHECK-SAFE: fdiv
|
||||
; CHECK-SAFE: blr
|
||||
%r = fdiv fast double %a, %b
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define float @goo2(float %a, float %b) nounwind {
|
||||
%r = fdiv float %a, %b
|
||||
ret float %r
|
||||
define double @foo2_safe(double %a, double %b) nounwind {
|
||||
; CHECK: @foo2_safe
|
||||
; CHECK: fdiv
|
||||
; CHECK: blr
|
||||
%r = fdiv double %a, %b
|
||||
ret double %r
|
||||
}
|
||||
|
||||
; CHECK: @goo2
|
||||
define float @goo2_fmf(float %a, float %b) nounwind {
|
||||
; CHECK: @goo2_fmf
|
||||
; CHECK-DAG: fres
|
||||
; CHECK-DAG: fnmsubs
|
||||
; CHECK: fmadds
|
||||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-SAFE: @goo2
|
||||
; CHECK-SAFE: fdivs
|
||||
; CHECK-SAFE: blr
|
||||
%r = fdiv fast float %a, %b
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
define float @goo2_safe(float %a, float %b) nounwind {
|
||||
; CHECK: @goo2_safe
|
||||
; CHECK: fdivs
|
||||
; CHECK: blr
|
||||
%r = fdiv float %a, %b
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK: @hoo2_fmf
|
||||
; CHECK: vrefp
|
||||
%r = fdiv fast <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @hoo2_safe(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
; CHECK: @hoo2_safe
|
||||
; CHECK-NOT: vrefp
|
||||
; CHECK: blr
|
||||
%r = fdiv <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK: @hoo2
|
||||
; CHECK: vrefp
|
||||
|
||||
; CHECK-SAFE: @hoo2
|
||||
; CHECK-SAFE-NOT: vrefp
|
||||
; CHECK-SAFE: blr
|
||||
}
|
||||
|
||||
define double @foo3(double %a) nounwind {
|
||||
%r = call double @llvm.sqrt.f64(double %a)
|
||||
ret double %r
|
||||
|
||||
; CHECK: @foo3
|
||||
define double @foo3_fmf(double %a) nounwind {
|
||||
; CHECK: @foo3_fmf
|
||||
; CHECK: fcmpu
|
||||
; CHECK-DAG: frsqrte
|
||||
; CHECK: fmul
|
||||
|
@ -229,17 +259,20 @@ define double @foo3(double %a) nounwind {
|
|||
; CHECK-NEXT: fmul
|
||||
; CHECK-NEXT: fmul
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE: @foo3
|
||||
; CHECK-SAFE: fsqrt
|
||||
; CHECK-SAFE: blr
|
||||
%r = call fast double @llvm.sqrt.f64(double %a)
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define float @goo3(float %a) nounwind {
|
||||
%r = call float @llvm.sqrt.f32(float %a)
|
||||
ret float %r
|
||||
define double @foo3_safe(double %a) nounwind {
|
||||
; CHECK: @foo3_safe
|
||||
; CHECK: fsqrt
|
||||
; CHECK: blr
|
||||
%r = call double @llvm.sqrt.f64(double %a)
|
||||
ret double %r
|
||||
}
|
||||
|
||||
; CHECK: @goo3
|
||||
define float @goo3_fmf(float %a) nounwind {
|
||||
; CHECK: @goo3_fmf
|
||||
; CHECK: fcmpu
|
||||
; CHECK-DAG: frsqrtes
|
||||
; CHECK: fmuls
|
||||
|
@ -247,24 +280,32 @@ define float @goo3(float %a) nounwind {
|
|||
; CHECK-NEXT: fmuls
|
||||
; CHECK-NEXT: fmuls
|
||||
; CHECK: blr
|
||||
|
||||
; CHECK-SAFE: @goo3
|
||||
; CHECK-SAFE: fsqrts
|
||||
; CHECK-SAFE: blr
|
||||
%r = call fast float @llvm.sqrt.f32(float %a)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define <4 x float> @hoo3(<4 x float> %a) nounwind {
|
||||
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
define float @goo3_safe(float %a) nounwind {
|
||||
; CHECK: @goo3_safe
|
||||
; CHECK: fsqrts
|
||||
; CHECK: blr
|
||||
%r = call float @llvm.sqrt.f32(float %a)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; CHECK: @hoo3
|
||||
define <4 x float> @hoo3_fmf(<4 x float> %a) nounwind {
|
||||
; CHECK: @hoo3_fmf
|
||||
; CHECK: vrsqrtefp
|
||||
; CHECK-DAG: vcmpeqfp
|
||||
%r = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; CHECK-SAFE: @hoo3
|
||||
; CHECK-SAFE-NOT: vrsqrtefp
|
||||
; CHECK-SAFE: blr
|
||||
define <4 x float> @hoo3_safe(<4 x float> %a) nounwind {
|
||||
; CHECK: @hoo3_safe
|
||||
; CHECK-NOT: vrsqrtefp
|
||||
; CHECK: blr
|
||||
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" }
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
|
||||
; rdar://13126763
|
||||
|
@ -62,9 +61,9 @@ define float @test5(<4 x float> %x) {
|
|||
%splat = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%v1 = extractelement <4 x float> %splat, i32 1
|
||||
%v0 = extractelement <4 x float> %splat, i32 0
|
||||
%add1 = fadd float %v0, %v1
|
||||
%add1 = fadd reassoc nsz float %v0, %v1
|
||||
%v2 = extractelement <4 x float> %splat, i32 2
|
||||
%add2 = fadd float %v2, %add1
|
||||
%add2 = fadd reassoc nsz float %v2, %add1
|
||||
ret float %add2
|
||||
}
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ define <4 x float> @constant_fold_fmul_v4f32_undef(<4 x float> %x) {
|
|||
ret <4 x float> %y
|
||||
}
|
||||
|
||||
define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul0_v4f32_nsz_nnan:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
|
@ -85,7 +85,7 @@ define <4 x float> @fmul0_v4f32_nsz_nnan(<4 x float> %x) #0 {
|
|||
ret <4 x float> %y
|
||||
}
|
||||
|
||||
define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul0_v4f32_undef:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
|
@ -94,23 +94,23 @@ define <4 x float> @fmul0_v4f32_undef(<4 x float> %x) #0 {
|
|||
ret <4 x float> %y
|
||||
}
|
||||
|
||||
define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul_c2_c4_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
|
||||
%z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
|
||||
%y = fmul fast <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
|
||||
%z = fmul fast <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul_c3_c4_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul <4 x float> %x, <float 3.0, float 3.0, float 3.0, float 3.0>
|
||||
%z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
|
||||
%y = fmul fast <4 x float> %x, <float 3.0, float 3.0, float 3.0, float 3.0>
|
||||
%z = fmul fast <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
|
@ -120,24 +120,24 @@ define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
|
|||
; CHECK: float 32
|
||||
|
||||
; We should be able to pre-multiply the two constant vectors.
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
%y = fmul fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%z = fmul fast <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
; Same as above, but reverse operands to make sure non-canonical form is also handled.
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
||||
%z = fmul <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>, %y
|
||||
%y = fmul fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
||||
%z = fmul fast <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>, %y
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
|
@ -172,14 +172,14 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_reassoc_2(<4 x float> %x) {
|
|||
|
||||
; More than one use of a constant multiply should not inhibit the optimization.
|
||||
; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) {
|
||||
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
%a = fadd <4 x float> %y, %z
|
||||
%y = fmul fast <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%z = fmul fast <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
%a = fadd fast <4 x float> %y, %z
|
||||
ret <4 x float> %a
|
||||
}
|
||||
|
||||
|
@ -191,7 +191,7 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x)
|
|||
; CHECK: float 24
|
||||
; CHECK: float 24
|
||||
|
||||
define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
|
||||
define <4 x float> @PR22698_splats(<4 x float> %a) {
|
||||
; CHECK-LABEL: PR22698_splats:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
|
@ -209,7 +209,7 @@ define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
|
|||
; CHECK: float 231
|
||||
; CHECK: float 384
|
||||
|
||||
define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
|
||||
define <4 x float> @PR22698_no_splats(<4 x float> %a) {
|
||||
; CHECK-LABEL: PR22698_no_splats:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
|
@ -220,23 +220,23 @@ define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
|
|||
ret <4 x float> %mul3
|
||||
}
|
||||
|
||||
define float @fmul_c2_c4_f32(float %x) #0 {
|
||||
define float @fmul_c2_c4_f32(float %x) {
|
||||
; CHECK-LABEL: fmul_c2_c4_f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul float %x, 2.0
|
||||
%z = fmul float %y, 4.0
|
||||
%y = fmul fast float %x, 2.0
|
||||
%z = fmul fast float %y, 4.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define float @fmul_c3_c4_f32(float %x) #0 {
|
||||
define float @fmul_c3_c4_f32(float %x) {
|
||||
; CHECK-LABEL: fmul_c3_c4_f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%y = fmul float %x, 3.0
|
||||
%z = fmul float %y, 4.0
|
||||
%y = fmul fast float %x, 3.0
|
||||
%z = fmul fast float %y, 4.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
|
@ -261,5 +261,3 @@ define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) {
|
|||
%mul = fmul <4 x float> %x.neg, %y.neg
|
||||
ret <4 x float> %mul
|
||||
}
|
||||
|
||||
attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
|
||||
|
|
|
@ -1,106 +1,106 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math --enable-no-nans-fp-math < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s
|
||||
|
||||
define float @test1(float %a) {
|
||||
define float @test1(float %a) #0 {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fadd float %a, %a
|
||||
%r = fadd float %t1, %t1
|
||||
%t1 = fadd nnan reassoc nsz float %a, %a
|
||||
%r = fadd nnan reassoc nsz float %t1, %t1
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test2(float %a) {
|
||||
define float @test2(float %a) #0 {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fmul float 4.0, %a
|
||||
%t2 = fadd float %a, %a
|
||||
%r = fadd float %t1, %t2
|
||||
%t1 = fmul nnan reassoc nsz float 4.0, %a
|
||||
%t2 = fadd nnan reassoc nsz float %a, %a
|
||||
%r = fadd nnan reassoc nsz float %t1, %t2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test3(float %a) {
|
||||
define float @test3(float %a) #0 {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fmul float %a, 4.0
|
||||
%t2 = fadd float %a, %a
|
||||
%r = fadd float %t1, %t2
|
||||
%t1 = fmul nnan reassoc nsz float %a, 4.0
|
||||
%t2 = fadd nnan reassoc nsz float %a, %a
|
||||
%r = fadd nnan reassoc nsz float %t1, %t2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test4(float %a) {
|
||||
define float @test4(float %a) #0 {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fadd float %a, %a
|
||||
%t2 = fmul float 4.0, %a
|
||||
%r = fadd float %t1, %t2
|
||||
%t1 = fadd nnan reassoc nsz float %a, %a
|
||||
%t2 = fmul nnan reassoc nsz float 4.0, %a
|
||||
%r = fadd nnan reassoc nsz float %t1, %t2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test5(float %a) {
|
||||
define float @test5(float %a) #0 {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fadd float %a, %a
|
||||
%t2 = fmul float %a, 4.0
|
||||
%r = fadd float %t1, %t2
|
||||
%t1 = fadd nnan reassoc nsz float %a, %a
|
||||
%t2 = fmul nnan reassoc nsz float %a, 4.0
|
||||
%r = fadd nnan reassoc nsz float %t1, %t2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test6(float %a) {
|
||||
define float @test6(float %a) #0 {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fmul float 2.0, %a
|
||||
%t2 = fadd float %a, %a
|
||||
%r = fsub float %t1, %t2
|
||||
%t1 = fmul nnan reassoc nsz float 2.0, %a
|
||||
%t2 = fadd nnan reassoc nsz float %a, %a
|
||||
%r = fsub nnan reassoc nsz float %t1, %t2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test7(float %a) {
|
||||
define float @test7(float %a) #0 {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fmul float %a, 2.0
|
||||
%t2 = fadd float %a, %a
|
||||
%r = fsub float %t1, %t2
|
||||
%t1 = fmul nnan reassoc nsz float %a, 2.0
|
||||
%t2 = fadd nnan reassoc nsz float %a, %a
|
||||
%r = fsub nnan reassoc nsz float %t1, %t2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @test8(float %a) {
|
||||
define float @test8(float %a) #0 {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fmul float %a, 0.0
|
||||
%t2 = fadd float %a, %t1
|
||||
%t1 = fmul nsz float %a, 0.0
|
||||
%t2 = fadd nnan reassoc nsz float %a, %t1
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define float @test9(float %a) {
|
||||
define float @test9(float %a) #0 {
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fmul float 0.0, %a
|
||||
%t2 = fadd float %t1, %a
|
||||
%t1 = fmul nsz float 0.0, %a
|
||||
%t2 = fadd nnan reassoc nsz float %t1, %a
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
define float @test10(float %a) {
|
||||
define float @test10(float %a) #0 {
|
||||
; CHECK-LABEL: test10:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = fsub float -0.0, %a
|
||||
%t2 = fadd float %a, %t1
|
||||
%t1 = fsub nsz float -0.0, %a
|
||||
%t2 = fadd nnan reassoc nsz float %a, %t1
|
||||
ret float %t2
|
||||
}
|
||||
|
||||
|
|
|
@ -1,98 +1,92 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=ANY,STRICT
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s --check-prefixes=ANY,UNSAFE
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
define float @fadd_zero(float %x) {
|
||||
; STRICT-LABEL: fadd_zero:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: xorps %xmm1, %xmm1
|
||||
; STRICT-NEXT: addss %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; UNSAFE-LABEL: fadd_zero:
|
||||
; UNSAFE: # %bb.0:
|
||||
; UNSAFE-NEXT: retq
|
||||
define float @fadd_zero_strict(float %x) {
|
||||
; CHECK-LABEL: fadd_zero_strict:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: addss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = fadd float %x, 0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_negzero(float %x) {
|
||||
; ANY-LABEL: fadd_negzero:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fadd_negzero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fadd float %x, -0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_produce_zero(float %x) {
|
||||
; ANY-LABEL: fadd_produce_zero:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps %xmm0, %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fadd_produce_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%neg = fsub nsz float 0.0, %x
|
||||
%r = fadd nnan float %neg, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_reassociate(float %x) {
|
||||
; ANY-LABEL: fadd_reassociate:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fadd_reassociate:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%sum = fadd float %x, 8.0
|
||||
%r = fadd reassoc nsz float %sum, 12.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_negzero_nsz(float %x) {
|
||||
; ANY-LABEL: fadd_negzero_nsz:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fadd_negzero_nsz:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fadd nsz float %x, -0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_zero_nsz(float %x) {
|
||||
; ANY-LABEL: fadd_zero_nsz:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fadd_zero_nsz:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fadd nsz float %x, 0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fsub_zero(float %x) {
|
||||
; ANY-LABEL: fsub_zero:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub float %x, 0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fsub_self(float %x) {
|
||||
; ANY-LABEL: fsub_self:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps %xmm0, %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_self:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub nnan float %x, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fsub_neg_x_y(float %x, float %y) {
|
||||
; ANY-LABEL: fsub_neg_x_y:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: subss %xmm0, %xmm1
|
||||
; ANY-NEXT: movaps %xmm1, %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_neg_x_y:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: subss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%neg = fsub nsz float 0.0, %x
|
||||
%r = fadd nsz float %neg, %y
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fsub_neg_y(float %x, float %y) {
|
||||
; ANY-LABEL: fsub_neg_y:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_neg_y:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mul = fmul float %x, 5.0
|
||||
%add = fadd float %mul, %y
|
||||
%r = fsub nsz reassoc float %y, %add
|
||||
|
@ -100,10 +94,10 @@ define float @fsub_neg_y(float %x, float %y) {
|
|||
}
|
||||
|
||||
define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) {
|
||||
; ANY-LABEL: fsub_neg_y_vector:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_neg_y_vector:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
|
||||
%add = fadd <4 x float> %mul, %y
|
||||
%r = fsub nsz reassoc <4 x float> %y, %add
|
||||
|
@ -111,10 +105,10 @@ define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) {
|
|||
}
|
||||
|
||||
define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y) {
|
||||
; ANY-LABEL: fsub_neg_y_vector_nonuniform:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_neg_y_vector_nonuniform:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mul = fmul <4 x float> %x, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
%add = fadd <4 x float> %mul, %y
|
||||
%r = fsub nsz reassoc <4 x float> %y, %add
|
||||
|
@ -122,10 +116,10 @@ define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y)
|
|||
}
|
||||
|
||||
define float @fsub_neg_y_commute(float %x, float %y) {
|
||||
; ANY-LABEL: fsub_neg_y_commute:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_neg_y_commute:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mul = fmul float %x, 5.0
|
||||
%add = fadd float %y, %mul
|
||||
%r = fsub nsz reassoc float %y, %add
|
||||
|
@ -133,10 +127,10 @@ define float @fsub_neg_y_commute(float %x, float %y) {
|
|||
}
|
||||
|
||||
define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) {
|
||||
; ANY-LABEL: fsub_neg_y_commute_vector:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_neg_y_commute_vector:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
|
||||
%add = fadd <4 x float> %y, %mul
|
||||
%r = fsub nsz reassoc <4 x float> %y, %add
|
||||
|
@ -146,10 +140,10 @@ define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) {
|
|||
; Y - (X + Y) --> -X
|
||||
|
||||
define float @fsub_fadd_common_op_fneg(float %x, float %y) {
|
||||
; ANY-LABEL: fsub_fadd_common_op_fneg:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_fadd_common_op_fneg:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = fadd float %x, %y
|
||||
%r = fsub reassoc nsz float %y, %a
|
||||
ret float %r
|
||||
|
@ -158,10 +152,10 @@ define float @fsub_fadd_common_op_fneg(float %x, float %y) {
|
|||
; Y - (X + Y) --> -X
|
||||
|
||||
define <4 x float> @fsub_fadd_common_op_fneg_vec(<4 x float> %x, <4 x float> %y) {
|
||||
; ANY-LABEL: fsub_fadd_common_op_fneg_vec:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_fadd_common_op_fneg_vec:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = fadd <4 x float> %x, %y
|
||||
%r = fsub nsz reassoc <4 x float> %y, %a
|
||||
ret <4 x float> %r
|
||||
|
@ -171,10 +165,10 @@ define <4 x float> @fsub_fadd_common_op_fneg_vec(<4 x float> %x, <4 x float> %y)
|
|||
; Commute operands of the 'add'.
|
||||
|
||||
define float @fsub_fadd_common_op_fneg_commute(float %x, float %y) {
|
||||
; ANY-LABEL: fsub_fadd_common_op_fneg_commute:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_fadd_common_op_fneg_commute:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = fadd float %y, %x
|
||||
%r = fsub reassoc nsz float %y, %a
|
||||
ret float %r
|
||||
|
@ -183,90 +177,90 @@ define float @fsub_fadd_common_op_fneg_commute(float %x, float %y) {
|
|||
; Y - (Y + X) --> -X
|
||||
|
||||
define <4 x float> @fsub_fadd_common_op_fneg_commute_vec(<4 x float> %x, <4 x float> %y) {
|
||||
; ANY-LABEL: fsub_fadd_common_op_fneg_commute_vec:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_fadd_common_op_fneg_commute_vec:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%a = fadd <4 x float> %y, %x
|
||||
%r = fsub reassoc nsz <4 x float> %y, %a
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define float @fsub_negzero(float %x) {
|
||||
; STRICT-LABEL: fsub_negzero:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: xorps %xmm1, %xmm1
|
||||
; STRICT-NEXT: addss %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; UNSAFE-LABEL: fsub_negzero:
|
||||
; UNSAFE: # %bb.0:
|
||||
; UNSAFE-NEXT: retq
|
||||
define float @fsub_negzero_strict(float %x) {
|
||||
; CHECK-LABEL: fsub_negzero_strict:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: addss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub float %x, -0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define <4 x float> @fsub_negzero_vector(<4 x float> %x) {
|
||||
; STRICT-LABEL: fsub_negzero_vector:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: xorps %xmm1, %xmm1
|
||||
; STRICT-NEXT: addps %xmm1, %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; UNSAFE-LABEL: fsub_negzero_vector:
|
||||
; UNSAFE: # %bb.0:
|
||||
; UNSAFE-NEXT: retq
|
||||
define float @fsub_negzero_nsz(float %x) {
|
||||
; CHECK-LABEL: fsub_negzero_nsz:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub nsz float %x, -0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define <4 x float> @fsub_negzero_strict_vector(<4 x float> %x) {
|
||||
; CHECK-LABEL: fsub_negzero_strict_vector:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: addps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub <4 x float> %x, <float -0.0, float -0.0, float -0.0, float -0.0>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define <4 x float> @fsub_negzero_nsz_vector(<4 x float> %x) {
|
||||
; CHECK-LABEL: fsub_negzero_nsz_vector:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub nsz <4 x float> %x, <float -0.0, float -0.0, float -0.0, float -0.0>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
define float @fsub_zero_nsz_1(float %x) {
|
||||
; ANY-LABEL: fsub_zero_nsz_1:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_zero_nsz_1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub nsz float %x, 0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fsub_zero_nsz_2(float %x) {
|
||||
; ANY-LABEL: fsub_zero_nsz_2:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fsub_zero_nsz_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = fsub nsz float 0.0, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fsub_negzero_nsz(float %x) {
|
||||
; ANY-LABEL: fsub_negzero_nsz:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
%r = fsub nsz float %x, -0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fmul_zero(float %x) {
|
||||
; ANY-LABEL: fmul_zero:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps %xmm0, %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fmul_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xorps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%r = fmul nnan nsz float %x, 0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fmul_one(float %x) {
|
||||
; ANY-LABEL: fmul_one:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fmul_one:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: retq
|
||||
%r = fmul float %x, 1.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fmul_x_const_const(float %x) {
|
||||
; ANY-LABEL: fmul_x_const_const:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
; CHECK-LABEL: fmul_x_const_const:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%mul = fmul reassoc float %x, 9.0
|
||||
%r = fmul reassoc float %mul, 4.0
|
||||
ret float %r
|
||||
|
|
Loading…
Reference in New Issue