llvm-project/llvm/test/CodeGen/X86/fp-fast.ll

; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s

define float @test1(float %a) {
; CHECK-LABEL: test1:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fadd float %a, %a
  %r = fadd float %t1, %t1
  ret float %r
}

define float @test2(float %a) {
; CHECK-LABEL: test2:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fmul float 4.0, %a
  %t2 = fadd float %a, %a
  %r = fadd float %t1, %t2
  ret float %r
}

define float @test3(float %a) {
; CHECK-LABEL: test3:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fmul float %a, 4.0
  %t2 = fadd float %a, %a
  %r = fadd float %t1, %t2
  ret float %r
}

define float @test4(float %a) {
; CHECK-LABEL: test4:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fadd float %a, %a
  %t2 = fmul float 4.0, %a
  %r = fadd float %t1, %t2
  ret float %r
}

define float @test5(float %a) {
; CHECK-LABEL: test5:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fadd float %a, %a
  %t2 = fmul float %a, 4.0
  %r = fadd float %t1, %t2
  ret float %r
}

define float @test6(float %a) {
; CHECK-LABEL: test6:
; CHECK:       # BB#0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fmul float 2.0, %a
  %t2 = fadd float %a, %a
  %r = fsub float %t1, %t2
  ret float %r
}

define float @test7(float %a) {
; CHECK-LABEL: test7:
; CHECK:       # BB#0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fmul float %a, 2.0
  %t2 = fadd float %a, %a
  %r = fsub float %t1, %t2
  ret float %r
}

define float @test8(float %a) {
; CHECK-LABEL: test8:
; CHECK:       # BB#0:
; CHECK-NEXT:    retq
  %t1 = fmul float %a, 0.0
  %t2 = fadd float %a, %t1
  ret float %t2
}

define float @test9(float %a) {
; CHECK-LABEL: test9:
; CHECK:       # BB#0:
; CHECK-NEXT:    retq
  %t1 = fmul float 0.0, %a
  %t2 = fadd float %t1, %a
  ret float %t2
}

define float @test10(float %a) {
; CHECK-LABEL: test10:
; CHECK:       # BB#0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fsub float -0.0, %a
  %t2 = fadd float %a, %t1
  ret float %t2
}

define float @test11(float %a) {
; CHECK-LABEL: test11:
; CHECK:       # BB#0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %t1 = fsub float -0.0, %a
  %t2 = fadd float %a, %t1
  ret float %t2
}
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s \| FileCheck %s`
Teach the DAG combiner to turn chains of FADDs (x+x+x+x+...) into FMULs by constants. This is only enabled in unsafe FP math mode, since it does not preserve rounding effects for all such constants. llvm-svn: 162956 2012-08-31 07:35:16 +08:00
			`define float @test1(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test1:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
Teach the DAG combiner to turn chains of FADDs (x+x+x+x+...) into FMULs by constants. This is only enabled in unsafe FP math mode, since it does not preserve rounding effects for all such constants. llvm-svn: 162956 2012-08-31 07:35:16 +08:00			`%t1 = fadd float %a, %a`
			`%r = fadd float %t1, %t1`
			`ret float %r`
			`}`

			`define float @test2(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test2:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
Teach the DAG combiner to turn chains of FADDs (x+x+x+x+...) into FMULs by constants. This is only enabled in unsafe FP math mode, since it does not preserve rounding effects for all such constants. llvm-svn: 162956 2012-08-31 07:35:16 +08:00			`%t1 = fmul float 4.0, %a`
			`%t2 = fadd float %a, %a`
			`%r = fadd float %t1, %t2`
			`ret float %r`
			`}`

			`define float @test3(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test3:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`%t1 = fmul float %a, 4.0`
			`%t2 = fadd float %a, %a`
			`%r = fadd float %t1, %t2`
			`ret float %r`
			`}`

			`define float @test4(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test4:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`%t1 = fadd float %a, %a`
			`%t2 = fmul float 4.0, %a`
			`%r = fadd float %t1, %t2`
			`ret float %r`
			`}`

			`define float @test5(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test5:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`%t1 = fadd float %a, %a`
			`%t2 = fmul float %a, 4.0`
			`%r = fadd float %t1, %t2`
			`ret float %r`
			`}`

			`define float @test6(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test6:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
Teach the DAG combiner to turn chains of FADDs (x+x+x+x+...) into FMULs by constants. This is only enabled in unsafe FP math mode, since it does not preserve rounding effects for all such constants. llvm-svn: 162956 2012-08-31 07:35:16 +08:00			`%t1 = fmul float 2.0, %a`
			`%t2 = fadd float %a, %a`
			`%r = fsub float %t1, %t2`
			`ret float %r`
			`}`

SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`define float @test7(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test7:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`%t1 = fmul float %a, 2.0`
			`%t2 = fadd float %a, %a`
			`%r = fsub float %t1, %t2`
			`ret float %r`
			`}`

			`define float @test8(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test8:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: retq`
Add a few more simple fast-math constant propagations and cancellations. llvm-svn: 167200 2012-11-01 10:00:53 +08:00			`%t1 = fmul float %a, 0.0`
			`%t2 = fadd float %a, %t1`
			`ret float %t2`
			`}`

SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`define float @test9(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test9:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: retq`
SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`%t1 = fmul float 0.0, %a`
			`%t2 = fadd float %t1, %a`
			`ret float %t2`
			`}`

			`define float @test10(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test10:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
SelectionDAG: Fix incorrect condition checks in some cases of folding FADD/FMUL combinations; also improve accuracy of comments llvm-svn: 183993 2013-06-15 02:17:35 +08:00			`%t1 = fsub float -0.0, %a`
			`%t2 = fadd float %a, %t1`
			`ret float %t2`
			`}`

			`define float @test11(float %a) {`
use update_llc_test_checks.py to tighten checking; remove unnecessary CPU param llvm-svn: 235604 2015-04-24 00:07:50 +08:00			`; CHECK-LABEL: test11:`
			`; CHECK: # BB#0:`
			`; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0`
			`; CHECK-NEXT: retq`
Add a few more simple fast-math constant propagations and cancellations. llvm-svn: 167200 2012-11-01 10:00:53 +08:00			`%t1 = fsub float -0.0, %a`
			`%t2 = fadd float %a, %t1`
			`ret float %t2`
			`}`
transform fadd chains to increase parallelism This is a compromise: with this simple patch, we should always handle a chain of exactly 3 operations optimally, but we're not generating the optimal balanced binary tree for a longer sequence. In general, this transform will reduce the dependency chain for a sequence of instructions using N operands from a worst case N-1 dependent operations to N/2 dependent operations. The optimal balanced binary tree would reduce the chain to log2(N). The trade-off for not dealing with longer sequences is: (1) we have less complexity in the compiler, (2) we avoid unknown compile-time blowup calculating a balanced tree, and (3) we don't need to worry about the increased register pressure required to parallelize longer sequences. It also seems unlikely that we would ever encounter really long strings of dependent ops like that in the wild, but I'm not sure how to verify that speculation. FWIW, I see no perf difference for test-suite running on btver2 (x86-64) with -ffast-math and this patch. We can extend this patch to cover other associative operations such as fmul, fmax, fmin, integer add, integer mul. This is a partial fix for: https://llvm.org/bugs/show_bug.cgi?id=17305 and if extended: https://llvm.org/bugs/show_bug.cgi?id=21768 https://llvm.org/bugs/show_bug.cgi?id=23116 The issue also came up in: http://reviews.llvm.org/D8941 Differential Revision: http://reviews.llvm.org/D9232 llvm-svn: 236031 2015-04-29 05:03:22 +08:00