llvm-project/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

; FUNC-LABEL: {{^}}rcp_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %rcp = fdiv float 1.0, %src
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %rcp = fdiv float 1.0, %src, !fpmath !0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %rcp = fdiv fast float 1.0, %src, !fpmath !0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %rcp = fdiv arcp float 1.0, %src, !fpmath !0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
  %rcp = fdiv float 1.0, %src, !fpmath !0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_fabs_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]|
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %src.fabs = call float @llvm.fabs.f32(float %src)
  %rcp = fdiv float 1.0, %src.fabs
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}neg_rcp_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[SRC]]
; GCN: buffer_store_dword [[RCP]]

; EG: RECIP_IEEE
define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %rcp = fdiv float -1.0, %src
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
; GCN: buffer_store_dword [[RCP]]
define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
  %src.fabs = call float @llvm.fabs.f32(float %src)
  %src.fabs.fneg = fsub float -0.0, %src.fabs
  %rcp = fdiv float 1.0, %src.fabs.fneg
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_multi_use_f32:
; GCN: s_load_dword [[SRC:s[0-9]+]]
; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]|
; GCN: buffer_store_dword [[RCP]]
; GCN: buffer_store_dword [[MUL]]
define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {
  %src.fabs = call float @llvm.fabs.f32(float %src)
  %src.fabs.fneg = fsub float -0.0, %src.fabs
  %rcp = fdiv float 1.0, %src.fabs.fneg
  store volatile float %rcp, float addrspace(1)* %out, align 4

  %other = fmul float %src, %src.fabs.fneg
  store volatile float %other, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32:
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}
; GCN: buffer_store_dword [[MUL]]
define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
  %x = load float, float addrspace(1)* undef
  %rcp = fdiv arcp float %x, 2.0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32:
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}}
; GCN: buffer_store_dword [[MUL]]
define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
  %x = load float, float addrspace(1)* undef
  %rcp = fdiv arcp float %x, 10.0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32:
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}}
; GCN: buffer_store_dword [[MUL]]
define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {
  %x = load float, float addrspace(1)* undef
  %rcp = fdiv arcp float %x, -10.0
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

declare float @llvm.fabs.f32(float) #1
declare float @llvm.sqrt.f32(float) #1

attributes #0 = { nounwind "unsafe-fp-math"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "unsafe-fp-math"="true" }

!0 = !{float 2.500000e+00}
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=FUNC %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=FUNC %s`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s`
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s \| FileCheck -check-prefix=EG -check-prefix=FUNC %s`

			`; FUNC-LABEL: {{^}}rcp_pat_f32:`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]`
			`; GCN: buffer_store_dword [[RCP]]`

AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`%rcp = fdiv float 1.0, %src`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00
			`; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]`
			`; GCN: buffer_store_dword [[RCP]]`

			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`%rcp = fdiv float 1.0, %src, !fpmath !0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]`
			`; GCN: buffer_store_dword [[RCP]]`

			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`%rcp = fdiv fast float 1.0, %src, !fpmath !0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]`
			`; GCN: buffer_store_dword [[RCP]]`

			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`%rcp = fdiv arcp float 1.0, %src, !fpmath !0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]`
			`; GCN: buffer_store_dword [[RCP]]`

			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`%rcp = fdiv float 1.0, %src, !fpmath !0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_fabs_pat_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], \|[[SRC]]\|`
			`; GCN: buffer_store_dword [[RCP]]`

			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`%src.fabs = call float @llvm.fabs.f32(float %src)`
			`%rcp = fdiv float 1.0, %src.fabs`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`; FUNC-LABEL: {{^}}neg_rcp_pat_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[SRC]]`
			`; GCN: buffer_store_dword [[RCP]]`

			`; EG: RECIP_IEEE`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`%rcp = fdiv float -1.0, %src`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32:`
AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -\|[[SRC]]\|`
			`; GCN: buffer_store_dword [[RCP]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00			`%src.fabs = call float @llvm.fabs.f32(float %src)`
			`%src.fabs.fneg = fsub float -0.0, %src.fabs`
			`%rcp = fdiv float 1.0, %src.fabs.fneg`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_multi_use_f32:`
			`; GCN: s_load_dword [[SRC:s[0-9]+]]`
			`; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -\|[[SRC]]\|`
			`; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -\|[[SRC]]\|`
AMDGPU/SI: Implement a custom MachineSchedStrategy Summary: GCNSchedStrategy re-uses most of GenericScheduler, it's just uses a different method to compute the excess and critical register pressure limits. It's not enabled by default, to enable it you need to pass -misched=gcn to llc. Shader DB stats: 32464 shaders in 17874 tests Totals: SGPRS: 1542846 -> 1643125 (6.50 %) VGPRS: 1005595 -> 904653 (-10.04 %) Spilled SGPRs: 29929 -> 27745 (-7.30 %) Spilled VGPRs: 334 -> 352 (5.39 %) Scratch VGPRs: 1612 -> 1624 (0.74 %) dwords per thread Code Size: 36688188 -> 37034900 (0.95 %) bytes LDS: 1913 -> 1913 (0.00 %) blocks Max Waves: 254101 -> 265125 (4.34 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 1338220 -> 1438499 (7.49 %) VGPRS: 886221 -> 785279 (-11.39 %) Spilled SGPRs: 29869 -> 27685 (-7.31 %) Spilled VGPRs: 334 -> 352 (5.39 %) Scratch VGPRs: 1612 -> 1624 (0.74 %) dwords per thread Code Size: 34315716 -> 34662428 (1.01 %) bytes LDS: 1551 -> 1551 (0.00 %) blocks Max Waves: 188127 -> 199151 (5.86 %) Wait states: 0 -> 0 (0.00 %) Reviewers: arsenm, mareko, nhaehnle, MatzeB, atrick Subscribers: arsenm, kzhuravl, llvm-commits Differential Revision: https://reviews.llvm.org/D23688 llvm-svn: 279995 2016-08-30 03:42:52 +08:00			`; GCN: buffer_store_dword [[RCP]]`
AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`; GCN: buffer_store_dword [[MUL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {`
AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`%src.fabs = call float @llvm.fabs.f32(float %src)`
			`%src.fabs.fneg = fsub float -0.0, %src.fabs`
			`%rcp = fdiv float 1.0, %src.fabs.fneg`
			`store volatile float %rcp, float addrspace(1)* %out, align 4`

			`%other = fmul float %src, %src.fabs.fneg`
			`store volatile float %other, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32:`
			`; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}`
			`; GCN: buffer_store_dword [[MUL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {`
AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`%x = load float, float addrspace(1)* undef`
			`%rcp = fdiv arcp float %x, 2.0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32:`
			`; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}}`
			`; GCN: buffer_store_dword [[MUL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {`
AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`%x = load float, float addrspace(1)* undef`
			`%rcp = fdiv arcp float %x, 10.0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32:`
			`; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}}`
			`; GCN: buffer_store_dword [[MUL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {`
AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`%x = load float, float addrspace(1)* undef`
			`%rcp = fdiv arcp float %x, -10.0`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00
			`declare float @llvm.fabs.f32(float) #1`
AMDGPU: fdiv -1, x -> rcp -x llvm-svn: 277535 2016-08-03 06:25:04 +08:00			`declare float @llvm.sqrt.f32(float) #1`
AMDGPU: Use rcp for fdiv 1, x with fpmath metadata Using rcp should be OK for safe math usually, so this should not be replacing the original fdiv. llvm-svn: 276823 2016-07-27 07:25:44 +08:00
			`attributes #0 = { nounwind "unsafe-fp-math"="false" }`
			`attributes #1 = { nounwind readnone }`
			`attributes #2 = { nounwind "unsafe-fp-math"="true" }`

			`!0 = !{float 2.500000e+00}`