[DAGCombiner] skip reciprocal divisor optimization for x/sqrt(x), better

I tried to fix this in:
rG716e35a0cf53
...but that patch depends on the order that we encounter the
magic "x/sqrt(x)" expression in the combiner's worklist.

This patch should improve that by waiting until we walk the
user list to decide if there's a use to skip.

The AArch64 test reveals another (existing) ordering problem
though - we may try to create an estimate for plain sqrt(x)
before we see that it is part of a 1/sqrt(x) expression.
This commit is contained in:
Sanjay Patel 2020-08-31 09:31:15 -04:00
parent 11e0c5b648
commit 1c9a09f42e
3 changed files with 37 additions and 35 deletions

View File

@ -13235,11 +13235,6 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
if (N1.getOpcode() == ISD::FSQRT && N1.getOperand(0) == N0 &&
Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
unsigned MinUses = TLI.combineRepeatedFPDivisors();
@ -13259,6 +13254,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
SetVector<SDNode *> Users;
for (auto *U : N1->uses()) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
U->getOperand(0) == U->getOperand(1).getOperand(0) &&
U->getFlags().hasAllowReassociation() &&
U->getFlags().hasNoSignedZeros())
continue;
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
@ -13470,6 +13472,10 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
// transform the fdiv, we may produce a sub-optimal estimate sequence
// because the reciprocal calculation may not have to filter out a
// 0.0 input.
return buildSqrtEstimate(N0, Flags);
}

View File

@ -570,19 +570,16 @@ define double @sqrt_simplify_before_recip_3_uses(double %x, double* %p1, double*
define double @sqrt_simplify_before_recip_3_uses_order(double %x, double* %p1, double* %p2) nounwind {
; FAULT-LABEL: sqrt_simplify_before_recip_3_uses_order:
; FAULT: // %bb.0:
; FAULT-NEXT: mov x9, #140737488355328
; FAULT-NEXT: mov x8, #4631107791820423168
; FAULT-NEXT: fmov d3, x8
; FAULT-NEXT: mov x8, #140737488355328
; FAULT-NEXT: fsqrt d1, d0
; FAULT-NEXT: fmov d2, #1.00000000
; FAULT-NEXT: movk x8, #16453, lsl #48
; FAULT-NEXT: fdiv d1, d2, d1
; FAULT-NEXT: fmov d2, x8
; FAULT-NEXT: fmul d0, d0, d1
; FAULT-NEXT: fmul d3, d1, d3
; FAULT-NEXT: fmul d1, d1, d2
; FAULT-NEXT: str d3, [x0]
; FAULT-NEXT: str d1, [x1]
; FAULT-NEXT: movk x9, #16453, lsl #48
; FAULT-NEXT: fsqrt d0, d0
; FAULT-NEXT: fmov d1, x8
; FAULT-NEXT: fmov d2, x9
; FAULT-NEXT: fdiv d1, d1, d0
; FAULT-NEXT: fdiv d2, d2, d0
; FAULT-NEXT: str d1, [x0]
; FAULT-NEXT: str d2, [x1]
; FAULT-NEXT: ret
;
; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order:
@ -644,21 +641,24 @@ define double @sqrt_simplify_before_recip_4_uses(double %x, double* %p1, double*
; CHECK-NEXT: fmul d1, d1, d3
; CHECK-NEXT: fmul d3, d1, d1
; CHECK-NEXT: frsqrts d3, d0, d3
; CHECK-NEXT: mov x8, #4631107791820423168
; CHECK-NEXT: fmul d1, d1, d3
; CHECK-NEXT: mov x8, #4631107791820423168
; CHECK-NEXT: fmul d3, d1, d1
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: mov x8, #140737488355328
; CHECK-NEXT: fmul d3, d1, d1
; CHECK-NEXT: movk x8, #16453, lsl #48
; CHECK-NEXT: frsqrts d3, d0, d3
; CHECK-NEXT: movk x8, #16453, lsl #48
; CHECK-NEXT: fmul d1, d1, d3
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fcmp d0, #0.0
; CHECK-NEXT: fmov d4, x8
; CHECK-NEXT: fmul d3, d0, d1
; CHECK-NEXT: fmul d2, d1, d2
; CHECK-NEXT: fmul d3, d1, d3
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fmul d4, d1, d4
; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: fcsel d1, d0, d3, eq
; CHECK-NEXT: fdiv d0, d0, d1
; CHECK-NEXT: str d2, [x1]
; CHECK-NEXT: str d3, [x2]
; CHECK-NEXT: str d4, [x2]
; CHECK-NEXT: ret
%sqrt = tail call fast double @llvm.sqrt.f64(double %x)
%rsqrt = fdiv fast double 1.0, %sqrt

View File

@ -997,21 +997,17 @@ define <2 x double> @sqrt_simplify_before_recip_vec(<2 x double> %x, <2 x double
define double @sqrt_simplify_before_recip_order(double %x, double* %p) nounwind {
; SSE-LABEL: sqrt_simplify_before_recip_order:
; SSE: # %bb.0:
; SSE-NEXT: sqrtsd %xmm0, %xmm1
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE-NEXT: divsd %xmm1, %xmm2
; SSE-NEXT: mulsd %xmm2, %xmm0
; SSE-NEXT: mulsd {{.*}}(%rip), %xmm2
; SSE-NEXT: movsd %xmm2, (%rdi)
; SSE-NEXT: sqrtsd %xmm0, %xmm0
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: divsd %xmm0, %xmm1
; SSE-NEXT: movsd %xmm1, (%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: sqrt_simplify_before_recip_order:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX-NEXT: vdivsd %xmm1, %xmm2, %xmm1
; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm1
; AVX-NEXT: vmovsd %xmm1, (%rdi)
; AVX-NEXT: retq
%sqrt = tail call fast double @llvm.sqrt.f64(double %x)