forked from OSchip/llvm-project
[DAGCombiner] skip reciprocal divisor optimization for x/sqrt(x), better
I tried to fix this in: rG716e35a0cf53 ...but that patch depends on the order that we encounter the magic "x/sqrt(x)" expression in the combiner's worklist. This patch should improve that by waiting until we walk the user list to decide if there's a use to skip. The AArch64 test reveals another (existing) ordering problem though - we may try to create an estimate for plain sqrt(x) before we see that it is part of a 1/sqrt(x) expression.
This commit is contained in:
parent
11e0c5b648
commit
1c9a09f42e
|
@ -13235,11 +13235,6 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
|
|||
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
|
||||
return SDValue();
|
||||
|
||||
// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
|
||||
if (N1.getOpcode() == ISD::FSQRT && N1.getOperand(0) == N0 &&
|
||||
Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())
|
||||
return SDValue();
|
||||
|
||||
// Exit early if the target does not want this transform or if there can't
|
||||
// possibly be enough uses of the divisor to make the transform worthwhile.
|
||||
unsigned MinUses = TLI.combineRepeatedFPDivisors();
|
||||
|
@ -13259,6 +13254,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
|
|||
SetVector<SDNode *> Users;
|
||||
for (auto *U : N1->uses()) {
|
||||
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
|
||||
// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
|
||||
if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
|
||||
U->getOperand(0) == U->getOperand(1).getOperand(0) &&
|
||||
U->getFlags().hasAllowReassociation() &&
|
||||
U->getFlags().hasNoSignedZeros())
|
||||
continue;
|
||||
|
||||
// This division is eligible for optimization only if global unsafe math
|
||||
// is enabled or if this division allows reciprocal formation.
|
||||
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
|
||||
|
@ -13470,6 +13472,10 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
|
|||
return SDValue();
|
||||
|
||||
// FSQRT nodes have flags that propagate to the created nodes.
|
||||
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
|
||||
// transform the fdiv, we may produce a sub-optimal estimate sequence
|
||||
// because the reciprocal calculation may not have to filter out a
|
||||
// 0.0 input.
|
||||
return buildSqrtEstimate(N0, Flags);
|
||||
}
|
||||
|
||||
|
|
|
@ -570,19 +570,16 @@ define double @sqrt_simplify_before_recip_3_uses(double %x, double* %p1, double*
|
|||
define double @sqrt_simplify_before_recip_3_uses_order(double %x, double* %p1, double* %p2) nounwind {
|
||||
; FAULT-LABEL: sqrt_simplify_before_recip_3_uses_order:
|
||||
; FAULT: // %bb.0:
|
||||
; FAULT-NEXT: mov x9, #140737488355328
|
||||
; FAULT-NEXT: mov x8, #4631107791820423168
|
||||
; FAULT-NEXT: fmov d3, x8
|
||||
; FAULT-NEXT: mov x8, #140737488355328
|
||||
; FAULT-NEXT: fsqrt d1, d0
|
||||
; FAULT-NEXT: fmov d2, #1.00000000
|
||||
; FAULT-NEXT: movk x8, #16453, lsl #48
|
||||
; FAULT-NEXT: fdiv d1, d2, d1
|
||||
; FAULT-NEXT: fmov d2, x8
|
||||
; FAULT-NEXT: fmul d0, d0, d1
|
||||
; FAULT-NEXT: fmul d3, d1, d3
|
||||
; FAULT-NEXT: fmul d1, d1, d2
|
||||
; FAULT-NEXT: str d3, [x0]
|
||||
; FAULT-NEXT: str d1, [x1]
|
||||
; FAULT-NEXT: movk x9, #16453, lsl #48
|
||||
; FAULT-NEXT: fsqrt d0, d0
|
||||
; FAULT-NEXT: fmov d1, x8
|
||||
; FAULT-NEXT: fmov d2, x9
|
||||
; FAULT-NEXT: fdiv d1, d1, d0
|
||||
; FAULT-NEXT: fdiv d2, d2, d0
|
||||
; FAULT-NEXT: str d1, [x0]
|
||||
; FAULT-NEXT: str d2, [x1]
|
||||
; FAULT-NEXT: ret
|
||||
;
|
||||
; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order:
|
||||
|
@ -644,21 +641,24 @@ define double @sqrt_simplify_before_recip_4_uses(double %x, double* %p1, double*
|
|||
; CHECK-NEXT: fmul d1, d1, d3
|
||||
; CHECK-NEXT: fmul d3, d1, d1
|
||||
; CHECK-NEXT: frsqrts d3, d0, d3
|
||||
; CHECK-NEXT: mov x8, #4631107791820423168
|
||||
; CHECK-NEXT: fmul d1, d1, d3
|
||||
; CHECK-NEXT: mov x8, #4631107791820423168
|
||||
; CHECK-NEXT: fmul d3, d1, d1
|
||||
; CHECK-NEXT: fmov d2, x8
|
||||
; CHECK-NEXT: mov x8, #140737488355328
|
||||
; CHECK-NEXT: fmul d3, d1, d1
|
||||
; CHECK-NEXT: movk x8, #16453, lsl #48
|
||||
; CHECK-NEXT: frsqrts d3, d0, d3
|
||||
; CHECK-NEXT: movk x8, #16453, lsl #48
|
||||
; CHECK-NEXT: fmul d1, d1, d3
|
||||
; CHECK-NEXT: fmov d3, x8
|
||||
; CHECK-NEXT: fcmp d0, #0.0
|
||||
; CHECK-NEXT: fmov d4, x8
|
||||
; CHECK-NEXT: fmul d3, d0, d1
|
||||
; CHECK-NEXT: fmul d2, d1, d2
|
||||
; CHECK-NEXT: fmul d3, d1, d3
|
||||
; CHECK-NEXT: fmul d0, d0, d1
|
||||
; CHECK-NEXT: fmul d4, d1, d4
|
||||
; CHECK-NEXT: str d1, [x0]
|
||||
; CHECK-NEXT: fcsel d1, d0, d3, eq
|
||||
; CHECK-NEXT: fdiv d0, d0, d1
|
||||
; CHECK-NEXT: str d2, [x1]
|
||||
; CHECK-NEXT: str d3, [x2]
|
||||
; CHECK-NEXT: str d4, [x2]
|
||||
; CHECK-NEXT: ret
|
||||
%sqrt = tail call fast double @llvm.sqrt.f64(double %x)
|
||||
%rsqrt = fdiv fast double 1.0, %sqrt
|
||||
|
|
|
@ -997,21 +997,17 @@ define <2 x double> @sqrt_simplify_before_recip_vec(<2 x double> %x, <2 x double
|
|||
define double @sqrt_simplify_before_recip_order(double %x, double* %p) nounwind {
|
||||
; SSE-LABEL: sqrt_simplify_before_recip_order:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: sqrtsd %xmm0, %xmm1
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; SSE-NEXT: divsd %xmm1, %xmm2
|
||||
; SSE-NEXT: mulsd %xmm2, %xmm0
|
||||
; SSE-NEXT: mulsd {{.*}}(%rip), %xmm2
|
||||
; SSE-NEXT: movsd %xmm2, (%rdi)
|
||||
; SSE-NEXT: sqrtsd %xmm0, %xmm0
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE-NEXT: divsd %xmm0, %xmm1
|
||||
; SSE-NEXT: movsd %xmm1, (%rdi)
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sqrt_simplify_before_recip_order:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
|
||||
; AVX-NEXT: vdivsd %xmm1, %xmm2, %xmm1
|
||||
; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm1
|
||||
; AVX-NEXT: vmovsd %xmm1, (%rdi)
|
||||
; AVX-NEXT: retq
|
||||
%sqrt = tail call fast double @llvm.sqrt.f64(double %x)
|
||||
|
|
Loading…
Reference in New Issue