forked from OSchip/llvm-project
Utilize new SDNode flag functionality to expand current support for fadd
Summary: This patch originated from D46562 and is a proper subset, with some issues addressed. Reviewers: spatel, hfinkel, wristow, arsenm, javed.absar Reviewed By: spatel Subscribers: wdng, nhaehnle Differential Revision: https://reviews.llvm.org/D47909 llvm-svn: 334996
This commit is contained in:
parent
87f0f95e4c
commit
7b993d762f
|
@ -10327,20 +10327,21 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
|||
return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
|
||||
}
|
||||
|
||||
// FIXME: Auto-upgrade the target/function-level option.
|
||||
if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
|
||||
// fold (fadd A, 0) -> A
|
||||
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
|
||||
if (N1C->isZero())
|
||||
return N0;
|
||||
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
|
||||
if (N1C && N1C->isZero()) {
|
||||
if (N1C->isNegative() || Options.UnsafeFPMath ||
|
||||
Flags.hasNoSignedZeros()) {
|
||||
// fold (fadd A, 0) -> A
|
||||
return N0;
|
||||
}
|
||||
}
|
||||
|
||||
// No FP constant should be created after legalization as Instruction
|
||||
// Selection pass has a hard time dealing with FP constants.
|
||||
bool AllowNewConst = (Level < AfterLegalizeDAG);
|
||||
|
||||
// TODO: fmf test for NaNs could be done here too
|
||||
if (Options.UnsafeFPMath && AllowNewConst) {
|
||||
// If 'unsafe math' or nnan is enabled, fold lots of things.
|
||||
if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
|
||||
// If allowed, fold (fadd (fneg x), x) -> 0.0
|
||||
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
|
||||
return DAG.getConstantFP(0.0, DL, VT);
|
||||
|
@ -10350,9 +10351,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
|||
return DAG.getConstantFP(0.0, DL, VT);
|
||||
}
|
||||
|
||||
// If 'unsafe math' is enabled, fold lots of things.
|
||||
// TODO: fmf testing for reassoc/nsz could be done here too
|
||||
if (Options.UnsafeFPMath && AllowNewConst) {
|
||||
// If 'unsafe math' or reassoc and nsz, fold lots of things.
|
||||
// TODO: break out portions of the transformations below for which Unsafe is
|
||||
// considered and which do not require both nsz and reassoc
|
||||
if ((Options.UnsafeFPMath ||
|
||||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
|
||||
AllowNewConst) {
|
||||
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
|
||||
if (N1CFP && N0.getOpcode() == ISD::FADD &&
|
||||
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
|
||||
|
|
|
@ -4442,24 +4442,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
case ISD::FMUL:
|
||||
case ISD::FDIV:
|
||||
case ISD::FREM:
|
||||
if (getTarget().Options.UnsafeFPMath) {
|
||||
if (Opcode == ISD::FADD) {
|
||||
// x+0 --> x
|
||||
if (N2CFP && N2CFP->getValueAPF().isZero())
|
||||
return N1;
|
||||
} else if (Opcode == ISD::FSUB) {
|
||||
// x-0 --> x
|
||||
if (N2CFP && N2CFP->getValueAPF().isZero())
|
||||
return N1;
|
||||
} else if (Opcode == ISD::FMUL) {
|
||||
// x*0 --> 0
|
||||
if (N2CFP && N2CFP->isZero())
|
||||
return N2;
|
||||
// x*1 --> x
|
||||
if (N2CFP && N2CFP->isExactlyValue(1.0))
|
||||
return N1;
|
||||
}
|
||||
}
|
||||
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
|
||||
assert(N1.getValueType() == N2.getValueType() &&
|
||||
N1.getValueType() == VT && "Binary operator types must match!");
|
||||
|
|
|
@ -100,11 +100,12 @@ define float @fadd_const_multiuse_fmf(float %x) {
|
|||
; CHECK-LABEL: fadd_const_multiuse_fmf:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI7_0
|
||||
; CHECK-NEXT: adrp x9, .LCPI7_1
|
||||
; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI7_0]
|
||||
; CHECK-NEXT: fadd s0, s0, s1
|
||||
; CHECK-NEXT: fmov s1, #17.00000000
|
||||
; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI7_1]
|
||||
; CHECK-NEXT: fadd s1, s0, s1
|
||||
; CHECK-NEXT: fadd s0, s0, s1
|
||||
; CHECK-NEXT: fadd s0, s0, s2
|
||||
; CHECK-NEXT: fadd s0, s1, s0
|
||||
; CHECK-NEXT: ret
|
||||
%a1 = fadd float %x, 42.0
|
||||
%a2 = fadd nsz reassoc float %a1, 17.0
|
||||
|
|
|
@ -66,7 +66,7 @@ define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float
|
|||
; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
|
||||
; SI-NOT: v_add_f32
|
||||
define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {
|
||||
%add = fadd float %a, 0.0
|
||||
%add = fadd nsz float %a, 0.0
|
||||
store float %add, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -32,18 +32,13 @@ declare float @llvm.fmuladd.f32(float %a, float %b, float %c);
|
|||
define float @fast_fmuladd_opts(float %a , float %b , float %c) {
|
||||
; X64-LABEL: fast_fmuladd_opts:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movaps %xmm0, %xmm1
|
||||
; X64-NEXT: addss %xmm0, %xmm1
|
||||
; X64-NEXT: addss %xmm0, %xmm1
|
||||
; X64-NEXT: movaps %xmm1, %xmm0
|
||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: fast_fmuladd_opts:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fld %st(0)
|
||||
; X86-NEXT: fadd %st(1)
|
||||
; X86-NEXT: faddp %st(1)
|
||||
; X86-NEXT: fmuls {{.*}}
|
||||
; X86-NEXT: retl
|
||||
%res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)
|
||||
ret float %res
|
||||
|
@ -56,9 +51,9 @@ define float @fast_fmuladd_opts(float %a , float %b , float %c) {
|
|||
define double @not_so_fast_mul_add(double %x) {
|
||||
; X64-LABEL: not_so_fast_mul_add:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; X64-NEXT: movsd {{.*}}(%rip), %xmm1
|
||||
; X64-NEXT: mulsd %xmm0, %xmm1
|
||||
; X64-NEXT: addsd %xmm1, %xmm0
|
||||
; X64-NEXT: mulsd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: movsd %xmm1, {{.*}}(%rip)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
|
@ -67,7 +62,9 @@ define double @not_so_fast_mul_add(double %x) {
|
|||
; X86-NEXT: fldl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fld %st(0)
|
||||
; X86-NEXT: fmull {{\.LCPI.*}}
|
||||
; X86-NEXT: fadd %st(0), %st(1)
|
||||
; X86-NEXT: fxch %st(1)
|
||||
; X86-NEXT: fmull {{\.LCPI.*}}
|
||||
; X86-NEXT: fxch %st(1)
|
||||
; X86-NEXT: fstpl mul1
|
||||
; X86-NEXT: retl
|
||||
%m = fmul double %x, 4.2
|
||||
|
|
|
@ -17,18 +17,33 @@ define float @fadd_zero(float %x) {
|
|||
}
|
||||
|
||||
define float @fadd_negzero(float %x) {
|
||||
; STRICT-LABEL: fadd_negzero:
|
||||
; STRICT: # %bb.0:
|
||||
; STRICT-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; STRICT-NEXT: retq
|
||||
;
|
||||
; UNSAFE-LABEL: fadd_negzero:
|
||||
; UNSAFE: # %bb.0:
|
||||
; UNSAFE-NEXT: retq
|
||||
; ANY-LABEL: fadd_negzero:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: retq
|
||||
%r = fadd float %x, -0.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_produce_zero(float %x) {
|
||||
; ANY-LABEL: fadd_produce_zero:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: xorps %xmm0, %xmm0
|
||||
; ANY-NEXT: retq
|
||||
%neg = fsub nsz float 0.0, %x
|
||||
%r = fadd nnan float %neg, %x
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_reassociate(float %x) {
|
||||
; ANY-LABEL: fadd_reassociate:
|
||||
; ANY: # %bb.0:
|
||||
; ANY-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; ANY-NEXT: retq
|
||||
%sum = fadd float %x, 8.0
|
||||
%r = fadd reassoc nsz float %sum, 12.0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @fadd_negzero_nsz(float %x) {
|
||||
; ANY-LABEL: fadd_negzero_nsz:
|
||||
; ANY: # %bb.0:
|
||||
|
|
Loading…
Reference in New Issue