llvm-project/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s

define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v1
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret float %val
}

define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fadd_f32_fpexcept_ignore:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v1
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
  ret float %val
}

define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fadd_f32_fpexcept_maytrap:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v1
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
  ret float %val
}

define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_strict:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v2
; GCN-NEXT:    v_add_f32_e32 v1, v1, v3
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret <2 x float> %val
}

define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_ignore:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v2
; GCN-NEXT:    v_add_f32_e32 v1, v1, v3
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
  ret <2 x float> %val
}

define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v2
; GCN-NEXT:    v_add_f32_e32 v1, v1, v3
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
  ret <2 x float> %val
}

define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
; GCN-LABEL: v_constained_fadd_v3f32_fpexcept_strict:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e32 v0, v0, v3
; GCN-NEXT:    v_add_f32_e32 v1, v1, v4
; GCN-NEXT:    v_add_f32_e32 v2, v2, v5
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %val = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret <3 x float> %val
}

define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
; GCN-LABEL: s_constained_fadd_f32_fpexcept_strict:
; GCN:       ; %bb.0:
; GCN-NEXT:    v_mov_b32_e32 v0, s3
; GCN-NEXT:    v_add_f32_e32 v0, s2, v0
; GCN-NEXT:    ; return to shader part epilog
  %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret float %val
}

define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e64 v0, |v0|, v1
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %fabs.x = call float @llvm.fabs.f32(float %x)
  %val = call float @llvm.experimental.constrained.fadd.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret float %val
}

define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_add_f32_e64 v0, v0, |v1|
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %fabs.y = call float @llvm.fabs.f32(float %y)
  %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret float %val
}

define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
; GCN:       ; %bb.0:
; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT:    v_sub_f32_e64 v0, v1, |v0|
; GCN-NEXT:    s_setpc_b64 s[30:31]
  %fabs.x = call float @llvm.fabs.f32(float %x)
  %neg.fabs.x = fneg float %fabs.x
  %val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
  ret float %val
}

declare float @llvm.fabs.f32(float) #1
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1
declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1

attributes #0 = { strictfp }
attributes #1 = { inaccessiblememonly nounwind willreturn }
AMDGPU: Select strict_fadd 2020-05-23 22:41:40 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s \| FileCheck -check-prefix=GCN %s`

			`define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v1`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret float %val`
			`}`

			`define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_f32_fpexcept_ignore:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v1`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")`
			`ret float %val`
			`}`

			`define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_f32_fpexcept_maytrap:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v1`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")`
			`ret float %val`
			`}`

			`define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_strict:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v2`
			`; GCN-NEXT: v_add_f32_e32 v1, v1, v3`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret <2 x float> %val`
			`}`

			`define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_ignore:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v2`
			`; GCN-NEXT: v_add_f32_e32 v1, v1, v3`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")`
			`ret <2 x float> %val`
			`}`

			`define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v2`
			`; GCN-NEXT: v_add_f32_e32 v1, v1, v3`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")`
			`ret <2 x float> %val`
			`}`

			`define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_v3f32_fpexcept_strict:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e32 v0, v0, v3`
			`; GCN-NEXT: v_add_f32_e32 v1, v1, v4`
			`; GCN-NEXT: v_add_f32_e32 v2, v2, v5`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%val = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret <3 x float> %val`
			`}`

			`define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {`
			`; GCN-LABEL: s_constained_fadd_f32_fpexcept_strict:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: v_mov_b32_e32 v0, s3`
			`; GCN-NEXT: v_add_f32_e32 v0, s2, v0`
			`; GCN-NEXT: ; return to shader part epilog`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret float %val`
			`}`

			`define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e64 v0, \|v0\|, v1`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%fabs.x = call float @llvm.fabs.f32(float %x)`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret float %val`
			`}`

			`define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
			`; GCN-NEXT: v_add_f32_e64 v0, v0, \|v1\|`
			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%fabs.y = call float @llvm.fabs.f32(float %y)`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret float %val`
			`}`

			`define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {`
			`; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:`
			`; GCN: ; %bb.0:`
			`; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)`
[FPEnv] Allow fneg + strict_fadd -> strict_fsub in DAGCombiner This is the first of a set of DAGCombiner changes enabling strictfp optimizations. I want to test to waters with this to make sure changes like these are acceptable for the strictfp case- this particular change should preserve exception ordering and result precision perfectly, and many other possible changes appear to be able to as well. Copied from regular fadd combines but modified to preserve ordering via the chain, this change allows strict_fadd x, (fneg y) to become struct_fsub x, y and strict_fadd (fneg x), y to become strict_fsub y, x. Differential Revision: https://reviews.llvm.org/D85548 2020-08-27 03:17:17 +08:00			`; GCN-NEXT: v_sub_f32_e64 v0, v1, \|v0\|`
AMDGPU: Select strict_fadd 2020-05-23 22:41:40 +08:00			`; GCN-NEXT: s_setpc_b64 s[30:31]`
			`%fabs.x = call float @llvm.fabs.f32(float %x)`
			`%neg.fabs.x = fneg float %fabs.x`
			`%val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")`
			`ret float %val`
			`}`

			`declare float @llvm.fabs.f32(float) #1`
			`declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1`
			`declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1`
			`declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1`

			`attributes #0 = { strictfp }`
			`attributes #1 = { inaccessiblememonly nounwind willreturn }`