[X86][FP16] Only generate approximate rsqrt when Reciprocal is true for half type

We have reasonable fast sqrt and accurate rsqrt for half type due to the
limited fractions. So neither do we need multi steps refinement for
rsqrt nor replace sqrt by rsqrt.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D114844
This commit is contained in:
Phoebe Wang 2021-12-02 13:11:07 +08:00
parent 4756a2f157
commit f13b43d570
3 changed files with 7 additions and 10 deletions

View File

@ -23190,6 +23190,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// We don't need to replace SQRT with RSQRT for half type.
if (VT.getScalarType() == MVT::f16)
return true;
// We never want to use both SQRT and RSQRT instructions for the same input.
if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
return false;
@ -23236,6 +23240,7 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
Subtarget.hasFP16()) {
assert(Reciprocal && "Don't replace SQRT with RSQRT for half type");
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 0;

View File

@ -123,12 +123,7 @@ define half @test_sqrt_sh2(half %a0, half %a1) {
define half @test_sqrt_sh3(half %a0, half %a1) {
; CHECK-LABEL: test_sqrt_sh3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm1
; CHECK-NEXT: vcmpltsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call fast half @llvm.sqrt.f16(half %a0)
ret half %1

View File

@ -972,10 +972,7 @@ define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_128_fast2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm1
; CHECK-NEXT: vcmpgeph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm1, %k1
; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vsqrtph %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
ret <8 x half> %1