llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

declare float @llvm.amdgcn.rcp.f32(float) #0
declare double @llvm.amdgcn.rcp.f64(double) #0

declare double @llvm.sqrt.f64(double) #0
declare float @llvm.sqrt.f32(float) #0

; FUNC-LABEL: {{^}}rcp_undef_f32:
; SI-NOT: v_rcp_f32
define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
  %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_2_f32:
; SI-NOT: v_rcp_f32
; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5
define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 {
  %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_10_f32:
; SI-NOT: v_rcp_f32
; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd
define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 {
  %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
; SI-NOT: [[RESULT]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {
  %rcp = fdiv float 1.0, %src
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
; SI-NOT: [[RESULT]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {
  %rcp = fdiv float 1.0, %src
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
; SI: v_div_scale_f32
define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {
  %rcp = fdiv float 1.0, %src
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
; SI: v_sqrt_f32_e32
; SI: v_rcp_f32_e32
define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
  %sqrt = call float @llvm.sqrt.f32(float %src)
  %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
; SI: v_rsq_f32_e32
define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {
  %sqrt = call float @llvm.sqrt.f32(float %src)
  %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
  store float %rcp, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}rcp_f64:
; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
; SI-NOT: [[RESULT]]
; SI: buffer_store_dwordx2 [[RESULT]]
define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
  %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
  store double %rcp, double addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}unsafe_rcp_f64:
; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
; SI-NOT: [[RESULT]]
; SI: buffer_store_dwordx2 [[RESULT]]
define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {
  %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
  store double %rcp, double addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}rcp_pat_f64:
; SI: v_div_scale_f64
define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
  %rcp = fdiv double 1.0, %src
  store double %rcp, double addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
; SI-NOT: [[RESULT]]
; SI: buffer_store_dwordx2 [[RESULT]]
define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
  %rcp = fdiv double 1.0, %src
  store double %rcp, double addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
; SI-NOT: v_rsq_f64_e32
; SI: v_sqrt_f64
; SI: v_rcp_f64
define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
  %sqrt = call double @llvm.sqrt.f64(double %src)
  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
  store double %rcp, double addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
; SI-NOT: [[RESULT]]
; SI: buffer_store_dwordx2 [[RESULT]]
define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
  %sqrt = call double @llvm.sqrt.f64(double %src)
  %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
  store double %rcp, double addrspace(1)* %out, align 8
  ret void
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`declare float @llvm.amdgcn.rcp.f32(float) #0`
			`declare double @llvm.amdgcn.rcp.f64(double) #0`
R600: Match rcp node on pre-SI llvm-svn: 213844 2014-07-24 14:59:24 +08:00
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`declare double @llvm.sqrt.f64(double) #0`
			`declare float @llvm.sqrt.f32(float) #0`
R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}rcp_undef_f32:`
			`; SI-NOT: v_rcp_f32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%rcp = call float @llvm.amdgcn.rcp.f32(float undef)`
			`store float %rcp, float addrspace(1)* %out, align 4`
AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_2_f32:`
			`; SI-NOT: v_rcp_f32`
			`; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 {`
AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`%rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_10_f32:`
			`; SI-NOT: v_rcp_f32`
			`; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 {`
AMDGPU: Constant fold rcp node When doing arcp optimization with a constant denominator, this was leaving behind rcps with constant inputs. llvm-svn: 297248 2017-03-08 08:48:46 +08:00			`%rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)`
			`store float %rcp, float addrspace(1)* %out, align 4`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`ret void`
			`}`
R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:`
			`; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}`
			`; SI-NOT: [[RESULT]]`
			`; SI: buffer_store_dword [[RESULT]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%rcp = fdiv float 1.0, %src`
R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:`
			`; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}`
			`; SI-NOT: [[RESULT]]`
			`; SI: buffer_store_dword [[RESULT]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%rcp = fdiv float 1.0, %src`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`
R600: Match rcp node on pre-SI llvm-svn: 213844 2014-07-24 14:59:24 +08:00
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:`
			`; SI: v_div_scale_f32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {`
R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`%rcp = fdiv float 1.0, %src`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:`
			`; SI: v_sqrt_f32_e32`
			`; SI: v_rcp_f32_e32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%sqrt = call float @llvm.sqrt.f32(float %src)`
			`%rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)`
			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:`
			`; SI: v_rsq_f32_e32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%sqrt = call float @llvm.sqrt.f32(float %src)`
			`%rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)`
R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`store float %rcp, float addrspace(1)* %out, align 4`
			`ret void`
			`}`
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00
			`; FUNC-LABEL: {{^}}rcp_f64:`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}`
			`; SI-NOT: [[RESULT]]`
			`; SI: buffer_store_dwordx2 [[RESULT]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%rcp = call double @llvm.amdgcn.rcp.f64(double %src)`
			`store double %rcp, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}unsafe_rcp_f64:`
			`; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}`
			`; SI-NOT: [[RESULT]]`
			`; SI: buffer_store_dwordx2 [[RESULT]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%rcp = call double @llvm.amdgcn.rcp.f64(double %src)`
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`store double %rcp, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}rcp_pat_f64:`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; SI: v_div_scale_f64`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {`
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`%rcp = fdiv double 1.0, %src`
			`store double %rcp, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:`
			`; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}`
			`; SI-NOT: [[RESULT]]`
			`; SI: buffer_store_dwordx2 [[RESULT]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%rcp = fdiv double 1.0, %src`
AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`store double %rcp, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:`
			`; SI-NOT: v_rsq_f64_e32`
			`; SI: v_sqrt_f64`
			`; SI: v_rcp_f64`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%sqrt = call double @llvm.sqrt.f64(double %src)`
			`%rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)`
			`store double %rcp, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:`
			`; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}`
			`; SI-NOT: [[RESULT]]`
			`; SI: buffer_store_dwordx2 [[RESULT]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`%sqrt = call double @llvm.sqrt.f64(double %src)`
			`%rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)`
			`store double %rcp, double addrspace(1)* %out, align 8`
AMDGPU: Fold more custom nodes to undef This will help sneak undefs past GVN into the DAG for some tests. Also add missing intrinsic for rsq_legacy, even though the node was already selected to the instruction. Also start passing the debug location to intrinsic errors. llvm-svn: 273181 2016-06-21 02:33:56 +08:00			`ret void`
			`}`

AMDGPU: Rename intrinsics to use amdgcn prefix The intrinsic target prefix should match the target name as it appears in the triple. This is not yet complete, but gets most of the important ones. llvm.AMDGPU.* intrinsics used by mesa and libclc are still handled for compatability for now. llvm-svn: 258557 2016-01-23 05:30:34 +08:00			`attributes #0 = { nounwind readnone }`
AMDGPU: Simplify tests with per function subtargets llvm-svn: 274971 2016-07-09 15:55:03 +08:00			`attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }`
			`attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }`
			`attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }`
			`attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }`