llvm-project/llvm/test/CodeGen/X86/sqrt-partial.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX

; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455
; We have to assume that errno can be set, so we have to make a libcall in that case.
; But it's better for perf to check that the argument is valid rather than the result of
; sqrtss/sqrtsd.
; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test
; for that), but we're checking the final asm to make sure that comes out as expected too.

define float @f(float %val) nounwind {
; SSE-LABEL: f:
; SSE:       # %bb.0:
; SSE-NEXT:    xorps %xmm1, %xmm1
; SSE-NEXT:    ucomiss %xmm1, %xmm0
; SSE-NEXT:    jb .LBB0_2
; SSE-NEXT:  # %bb.1: # %.split
; SSE-NEXT:    sqrtss %xmm0, %xmm0
; SSE-NEXT:    retq
; SSE-NEXT:  .LBB0_2: # %call.sqrt
; SSE-NEXT:    jmp sqrtf # TAILCALL
;
; AVX-LABEL: f:
; AVX:       # %bb.0:
; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vucomiss %xmm1, %xmm0
; AVX-NEXT:    jb .LBB0_2
; AVX-NEXT:  # %bb.1: # %.split
; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
; AVX-NEXT:    retq
; AVX-NEXT:  .LBB0_2: # %call.sqrt
; AVX-NEXT:    jmp sqrtf # TAILCALL
  %res = tail call float @sqrtf(float %val)
  ret float %res
}

define double @d(double %val) nounwind {
; SSE-LABEL: d:
; SSE:       # %bb.0:
; SSE-NEXT:    xorpd %xmm1, %xmm1
; SSE-NEXT:    ucomisd %xmm1, %xmm0
; SSE-NEXT:    jb .LBB1_2
; SSE-NEXT:  # %bb.1: # %.split
; SSE-NEXT:    sqrtsd %xmm0, %xmm0
; SSE-NEXT:    retq
; SSE-NEXT:  .LBB1_2: # %call.sqrt
; SSE-NEXT:    jmp sqrt # TAILCALL
;
; AVX-LABEL: d:
; AVX:       # %bb.0:
; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vucomisd %xmm1, %xmm0
; AVX-NEXT:    jb .LBB1_2
; AVX-NEXT:  # %bb.1: # %.split
; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT:    retq
; AVX-NEXT:  .LBB1_2: # %call.sqrt
; AVX-NEXT:    jmp sqrt # TAILCALL
  %res = tail call double @sqrt(double %val)
  ret double %res
}

define double @minsize(double %x, double %y) minsize {
; SSE-LABEL: minsize:
; SSE:       # %bb.0:
; SSE-NEXT:    mulsd %xmm0, %xmm0
; SSE-NEXT:    mulsd %xmm1, %xmm1
; SSE-NEXT:    addsd %xmm0, %xmm1
; SSE-NEXT:    sqrtsd %xmm1, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: minsize:
; AVX:       # %bb.0:
; AVX-NEXT:    vmulsd %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vmulsd %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT:    retq
  %t3 = fmul fast double %x, %x
  %t4 = fmul fast double %y, %y
  %t5 = fadd fast double %t3, %t4
  %t6 = tail call fast double @llvm.sqrt.f64(double %t5)
  ret double %t6
}

; Partial reg avoidance may involve register allocation
; rather than adding an instruction.

define double @partial_dep_minsize(double %x, double %y) minsize {
; SSE-LABEL: partial_dep_minsize:
; SSE:       # %bb.0:
; SSE-NEXT:    sqrtsd %xmm1, %xmm0
; SSE-NEXT:    addsd %xmm1, %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: partial_dep_minsize:
; AVX:       # %bb.0:
; AVX-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm0
; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %t6 = tail call fast double @llvm.sqrt.f64(double %y)
  %t = fadd fast double %t6, %y
  ret double %t
}

declare float @sqrtf(float)
declare double @sqrt(double)
declare double @llvm.sqrt.f64(double)
[x86] add sqrt tests for partially-inline-libcalls (PR31455) llvm-svn: 318630 2017-11-20 01:31:37 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
[X86] Add AVX partial dependency tests as noted on D67363 llvm-svn: 371525 2019-09-10 22:28:29 +08:00			`; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s --check-prefixes=CHECK,SSE`
			`; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX`
[x86] add sqrt tests for partially-inline-libcalls (PR31455) llvm-svn: 318630 2017-11-20 01:31:37 +08:00
			`; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455`
			`; We have to assume that errno can be set, so we have to make a libcall in that case.`
[PartiallyInlineLibCalls][x86] add TTI hook to allow sqrt inlining to depend on arg rather than result This should fix PR31455: https://bugs.llvm.org/show_bug.cgi?id=31455 Differential Revision: https://reviews.llvm.org/D28314 llvm-svn: 319094 2017-11-28 05:15:43 +08:00			`; But it's better for perf to check that the argument is valid rather than the result of`
[x86] add sqrt tests for partially-inline-libcalls (PR31455) llvm-svn: 318630 2017-11-20 01:31:37 +08:00			`; sqrtss/sqrtsd.`
			`; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test`
			`; for that), but we're checking the final asm to make sure that comes out as expected too.`

			`define float @f(float %val) nounwind {`
[X86] Add AVX partial dependency tests as noted on D67363 llvm-svn: 371525 2019-09-10 22:28:29 +08:00			`; SSE-LABEL: f:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: xorps %xmm1, %xmm1`
			`; SSE-NEXT: ucomiss %xmm1, %xmm0`
			`; SSE-NEXT: jb .LBB0_2`
			`; SSE-NEXT: # %bb.1: # %.split`
			`; SSE-NEXT: sqrtss %xmm0, %xmm0`
			`; SSE-NEXT: retq`
			`; SSE-NEXT: .LBB0_2: # %call.sqrt`
			`; SSE-NEXT: jmp sqrtf # TAILCALL`
			`;`
			`; AVX-LABEL: f:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1`
			`; AVX-NEXT: vucomiss %xmm1, %xmm0`
			`; AVX-NEXT: jb .LBB0_2`
			`; AVX-NEXT: # %bb.1: # %.split`
			`; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`; AVX-NEXT: .LBB0_2: # %call.sqrt`
			`; AVX-NEXT: jmp sqrtf # TAILCALL`
[x86] add sqrt tests for partially-inline-libcalls (PR31455) llvm-svn: 318630 2017-11-20 01:31:37 +08:00			`%res = tail call float @sqrtf(float %val)`
			`ret float %res`
			`}`

			`define double @d(double %val) nounwind {`
[X86] Add AVX partial dependency tests as noted on D67363 llvm-svn: 371525 2019-09-10 22:28:29 +08:00			`; SSE-LABEL: d:`
			`; SSE: # %bb.0:`
[X86] Add floating point execution domain to comi/ucomi/cvtss2si/cvtsd2si/cvttss2si/cvttsd2si/cvtsi2ss/cvtsi2sd instructions. 2019-12-01 03:12:07 +08:00			`; SSE-NEXT: xorpd %xmm1, %xmm1`
[X86] Add AVX partial dependency tests as noted on D67363 llvm-svn: 371525 2019-09-10 22:28:29 +08:00			`; SSE-NEXT: ucomisd %xmm1, %xmm0`
			`; SSE-NEXT: jb .LBB1_2`
			`; SSE-NEXT: # %bb.1: # %.split`
			`; SSE-NEXT: sqrtsd %xmm0, %xmm0`
			`; SSE-NEXT: retq`
			`; SSE-NEXT: .LBB1_2: # %call.sqrt`
			`; SSE-NEXT: jmp sqrt # TAILCALL`
			`;`
			`; AVX-LABEL: d:`
			`; AVX: # %bb.0:`
[X86] Add floating point execution domain to comi/ucomi/cvtss2si/cvtsd2si/cvttss2si/cvttsd2si/cvtsi2ss/cvtsi2sd instructions. 2019-12-01 03:12:07 +08:00			`; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1`
[X86] Add AVX partial dependency tests as noted on D67363 llvm-svn: 371525 2019-09-10 22:28:29 +08:00			`; AVX-NEXT: vucomisd %xmm1, %xmm0`
			`; AVX-NEXT: jb .LBB1_2`
			`; AVX-NEXT: # %bb.1: # %.split`
			`; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`; AVX-NEXT: .LBB1_2: # %call.sqrt`
			`; AVX-NEXT: jmp sqrt # TAILCALL`
[x86] add sqrt tests for partially-inline-libcalls (PR31455) llvm-svn: 318630 2017-11-20 01:31:37 +08:00			`%res = tail call double @sqrt(double %val)`
			`ret double %res`
			`}`

[x86] add test for false dependency with minsize (PR43239); NFC llvm-svn: 371433 2019-09-10 02:14:10 +08:00			`define double @minsize(double %x, double %y) minsize {`
[X86] Add AVX partial dependency tests as noted on D67363 llvm-svn: 371525 2019-09-10 22:28:29 +08:00			`; SSE-LABEL: minsize:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: mulsd %xmm0, %xmm0`
			`; SSE-NEXT: mulsd %xmm1, %xmm1`
			`; SSE-NEXT: addsd %xmm0, %xmm1`
			`; SSE-NEXT: sqrtsd %xmm1, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: minsize:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm0`
			`; AVX-NEXT: vmulsd %xmm1, %xmm1, %xmm1`
			`; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
[x86] add test for false dependency with minsize (PR43239); NFC llvm-svn: 371433 2019-09-10 02:14:10 +08:00			`%t3 = fmul fast double %x, %x`
			`%t4 = fmul fast double %y, %y`
			`%t5 = fadd fast double %t3, %t4`
			`%t6 = tail call fast double @llvm.sqrt.f64(double %t5)`
			`ret double %t6`
			`}`

[x86] add a test for BreakFalseDeps; NFC As discussed in D67363 llvm-svn: 371528 2019-09-10 23:42:22 +08:00			`; Partial reg avoidance may involve register allocation`
			`; rather than adding an instruction.`

			`define double @partial_dep_minsize(double %x, double %y) minsize {`
			`; SSE-LABEL: partial_dep_minsize:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: sqrtsd %xmm1, %xmm0`
			`; SSE-NEXT: addsd %xmm1, %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: partial_dep_minsize:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm0`
			`; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%t6 = tail call fast double @llvm.sqrt.f64(double %y)`
			`%t = fadd fast double %t6, %y`
			`ret double %t`
			`}`

[x86] add sqrt tests for partially-inline-libcalls (PR31455) llvm-svn: 318630 2017-11-20 01:31:37 +08:00			`declare float @sqrtf(float)`
			`declare double @sqrt(double)`
[x86] add test for false dependency with minsize (PR43239); NFC llvm-svn: 371433 2019-09-10 02:14:10 +08:00			`declare double @llvm.sqrt.f64(double)`