forked from OSchip/llvm-project
[X86] Pre-commit tests to show the problem of SQRT when `RefinementSteps` = 0. NFC
This commit is contained in:
parent
62fea88bc5
commit
65a3de91ab
|
@ -120,6 +120,20 @@ define half @test_sqrt_sh2(half %a0, half %a1) {
|
|||
ret half %2
|
||||
}
|
||||
|
||||
define half @test_sqrt_sh3(half %a0, half %a1) {
|
||||
; CHECK-LABEL: test_sqrt_sh3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcmpltsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
|
||||
; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%1 = call fast half @llvm.sqrt.f16(half %a0)
|
||||
ret half %1
|
||||
}
|
||||
|
||||
declare half @llvm.sqrt.f16(half)
|
||||
|
||||
define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
|
||||
|
|
|
@ -969,6 +969,18 @@ define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
|
|||
ret <8 x half> %2
|
||||
}
|
||||
|
||||
define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
|
||||
; CHECK-LABEL: test_sqrt_ph_128_fast2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm1
|
||||
; CHECK-NEXT: vcmpgeph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm1, %k1
|
||||
; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
|
||||
ret <8 x half> %1
|
||||
}
|
||||
|
||||
define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_sqrt_ph_128:
|
||||
; CHECK: # %bb.0:
|
||||
|
|
|
@ -384,6 +384,37 @@ define float @f32_estimate(float %x) #1 {
|
|||
ret float %div
|
||||
}
|
||||
|
||||
define float @f32_estimate2(float %x) #5 {
|
||||
; SSE-LABEL: f32_estimate2:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: rsqrtss %xmm0, %xmm1
|
||||
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: andnps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: f32_estimate2:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vandnps %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: f32_estimate2:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
|
||||
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
|
||||
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%sqrt = tail call fast float @llvm.sqrt.f32(float %x)
|
||||
ret float %sqrt
|
||||
}
|
||||
|
||||
define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||
; SSE-LABEL: v4f32_no_estimate:
|
||||
; SSE: # %bb.0:
|
||||
|
@ -446,6 +477,39 @@ define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
|
|||
ret <4 x float> %div
|
||||
}
|
||||
|
||||
define <4 x float> @v4f32_estimate2(<4 x float> %x) #5 {
|
||||
; SSE-LABEL: v4f32_estimate2:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: rsqrtps %xmm0, %xmm2
|
||||
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
|
||||
; SSE-NEXT: cmpleps %xmm0, %xmm1
|
||||
; SSE-NEXT: andps %xmm2, %xmm1
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: v4f32_estimate2:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
|
||||
; AVX1-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vrsqrtps %xmm0, %xmm0
|
||||
; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4f32_estimate2:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
|
||||
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
|
||||
; AVX512-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
|
||||
; AVX512-NEXT: vrsqrtps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vandps %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%sqrt = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
|
||||
ret <4 x float> %sqrt
|
||||
}
|
||||
|
||||
define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||
; SSE-LABEL: v8f32_no_estimate:
|
||||
; SSE: # %bb.0:
|
||||
|
@ -1020,3 +1084,4 @@ attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt"
|
|||
attributes #2 = { nounwind readnone }
|
||||
attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" }
|
||||
attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
|
||||
attributes #5 = { "unsafe-fp-math"="true" "reciprocal-estimates"="all:0" }
|
||||
|
|
Loading…
Reference in New Issue