2018-08-31 13:49:54 +08:00
|
|
|
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
|
|
|
|
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
|
|
|
|
; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z3sinf(
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z3cosf(
|
2017-11-05 01:37:43 +08:00
|
|
|
; GCN-PRELINK: call fast float @_Z6sincosfPf(
|
2017-08-12 00:42:09 +08:00
|
|
|
; GCN-NATIVE: tail call fast float @_Z10native_sinf(
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z10native_cosf(
|
|
|
|
define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3sinf(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
%call2 = tail call fast float @_Z3cosf(float %tmp)
|
|
|
|
%arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
store float %call2, float addrspace(1)* %arrayidx3, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3sinf(float)
|
|
|
|
|
|
|
|
declare float @_Z3cosf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
|
|
|
|
; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f(
|
|
|
|
; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f(
|
2017-11-05 01:37:43 +08:00
|
|
|
; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_(
|
2017-08-12 00:42:09 +08:00
|
|
|
; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f(
|
|
|
|
; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f(
|
|
|
|
define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8
|
|
|
|
%call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
|
|
|
|
store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8
|
|
|
|
%call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
|
|
|
|
%arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1
|
|
|
|
store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <2 x float> @_Z3sinDv2_f(<2 x float>)
|
|
|
|
|
|
|
|
declare <2 x float> @_Z3cosDv2_f(<2 x float>)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
|
|
|
|
; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f(
|
|
|
|
; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f(
|
2017-11-05 01:37:43 +08:00
|
|
|
; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_(
|
2017-08-12 00:42:09 +08:00
|
|
|
; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f(
|
|
|
|
; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f(
|
|
|
|
define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)*
|
|
|
|
%loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16
|
|
|
|
%extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
|
|
%call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
|
|
|
|
%extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
|
|
|
store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16
|
|
|
|
%call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
|
|
|
|
%arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1
|
|
|
|
%extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
|
|
|
%storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)*
|
|
|
|
store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <3 x float> @_Z3sinDv3_f(<3 x float>)
|
|
|
|
|
|
|
|
declare <3 x float> @_Z3cosDv3_f(<3 x float>)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
|
|
|
|
; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f(
|
|
|
|
; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f(
|
2017-11-05 01:37:43 +08:00
|
|
|
; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_(
|
2017-08-12 00:42:09 +08:00
|
|
|
; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f(
|
|
|
|
; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f(
|
|
|
|
define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
|
|
|
|
%call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
|
|
|
|
store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16
|
|
|
|
%call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
|
|
|
|
%arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1
|
|
|
|
store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x float> @_Z3sinDv4_f(<4 x float>)
|
|
|
|
|
|
|
|
declare <4 x float> @_Z3cosDv4_f(<4 x float>)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
|
|
|
|
; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f(
|
|
|
|
; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f(
|
2017-11-05 01:37:43 +08:00
|
|
|
; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_(
|
2017-08-12 00:42:09 +08:00
|
|
|
; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f(
|
|
|
|
; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f(
|
|
|
|
define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32
|
|
|
|
%call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
|
|
|
|
store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32
|
|
|
|
%call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
|
|
|
|
%arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1
|
|
|
|
store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <8 x float> @_Z3sinDv8_f(<8 x float>)
|
|
|
|
|
|
|
|
declare <8 x float> @_Z3cosDv8_f(<8 x float>)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
|
|
|
|
; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f(
|
|
|
|
; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f(
|
2017-11-05 01:37:43 +08:00
|
|
|
; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_(
|
2017-08-12 00:42:09 +08:00
|
|
|
; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f(
|
|
|
|
; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f(
|
|
|
|
define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64
|
|
|
|
%call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
|
|
|
|
store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64
|
|
|
|
%call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
|
|
|
|
%arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1
|
|
|
|
store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <16 x float> @_Z3sinDv16_f(<16 x float>)
|
|
|
|
|
|
|
|
declare <16 x float> @_Z3cosDv16_f(<16 x float>)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip
|
|
|
|
; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%call = tail call fast float @_Z12native_recipf(float 3.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z12native_recipf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip
|
|
|
|
; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%call = tail call fast float @_Z10half_recipf(float 3.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z10half_recipf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
|
|
|
|
; GCN: fmul fast float %tmp, 0x3FD5555560000000
|
|
|
|
define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z13native_divideff(float, float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
|
|
|
|
; GCN: fmul fast float %tmp, 0x3FD5555560000000
|
|
|
|
define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z11half_divideff(float, float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f
|
|
|
|
; GCN: store float 1.000000e+00, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3powff(float, float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i
|
|
|
|
; GCN: store float 1.000000e+00, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
; GCN: store float %tmp, float addrspace(1)* %a, align 4
|
|
|
|
define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
; GCN: store float %tmp, float addrspace(1)* %a, align 4
|
|
|
|
define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
; GCN: %__pow2 = fmul fast float %tmp, %tmp
|
|
|
|
define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
; GCN: %__pow2 = fmul fast float %tmp, %tmp
|
|
|
|
define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
|
|
|
|
define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
|
|
|
|
define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
|
|
|
|
; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
|
|
|
|
; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c
|
|
|
|
; GCN: %__powx2 = fmul fast float %tmp, %tmp
|
|
|
|
; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
|
|
|
|
; GCN: %__powx22 = fmul fast float %__powx2, %tmp
|
2017-09-06 08:30:27 +08:00
|
|
|
; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
|
|
|
|
; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
|
2017-08-12 00:42:09 +08:00
|
|
|
define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c
|
|
|
|
; GCN: %__powx2 = fmul fast float %tmp, %tmp
|
|
|
|
; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
|
|
|
|
; GCN: %__powx22 = fmul fast float %__powx2, %tmp
|
2017-09-06 08:30:27 +08:00
|
|
|
; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
|
|
|
|
; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
|
2017-08-12 00:42:09 +08:00
|
|
|
define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z4powrff(float, float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c
|
|
|
|
; GCN: %__powx2 = fmul fast float %tmp, %tmp
|
|
|
|
; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
|
|
|
|
; GCN: %__powx22 = fmul fast float %__powx2, %tmp
|
2017-09-06 08:30:27 +08:00
|
|
|
; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
|
|
|
|
; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
|
2017-08-12 00:42:09 +08:00
|
|
|
define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z4pownfi(float %tmp, i32 11)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z4pownfi(float, i32)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
|
|
|
|
; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
|
|
|
|
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
|
|
|
|
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
|
|
|
|
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
|
2017-09-06 08:30:27 +08:00
|
|
|
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
|
|
|
|
; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
|
|
|
|
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
|
|
|
|
; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
|
|
|
|
; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
|
|
|
|
; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
|
2017-08-12 00:42:09 +08:00
|
|
|
define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1)
|
|
|
|
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)
|
|
|
|
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
|
|
|
|
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
|
|
|
|
; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4
|
|
|
|
; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
|
|
|
|
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
|
|
|
|
; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
|
|
|
|
; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
|
|
|
|
define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
|
|
|
|
%call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
|
|
|
|
; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
|
|
|
|
; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
|
|
|
|
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
|
|
|
|
; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
|
|
|
|
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
|
|
|
|
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
|
|
|
|
; GCN-PRELINK: %__yeven = shl i32 %conv, 31
|
2017-09-06 08:30:27 +08:00
|
|
|
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
|
|
|
|
; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
|
|
|
|
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
|
|
|
|
; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
|
|
|
|
; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
|
|
|
|
; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
|
2017-08-12 00:42:09 +08:00
|
|
|
define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
|
|
|
|
%conv = fptosi float %tmp1 to i32
|
|
|
|
%call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
|
|
|
|
; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
; GCN: store float %tmp, float addrspace(1)* %a, align 4
|
|
|
|
define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%call = tail call fast float @_Z5rootnfi(float %tmp, i32 1)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z5rootnfi(float, i32)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2)
|
|
|
|
; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5rootnfi(float %tmp, i32 2)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3)
|
|
|
|
; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5rootnfi(float %tmp, i32 3)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1
|
|
|
|
; GCN: fdiv fast float 1.000000e+00, %tmp
|
|
|
|
define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
|
|
|
|
; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
|
|
|
|
; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x
|
|
|
|
; GCN: store float %y, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3fmafff(float, float, float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0
|
|
|
|
; GCN: store float %y, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x
|
|
|
|
; GCN: store float %y, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3madfff(float, float, float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0
|
|
|
|
; GCN: store float %y, float addrspace(1)* %a
|
|
|
|
define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y
|
|
|
|
; GCN: %fmaadd = fadd fast float %tmp, %y
|
|
|
|
define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy
|
|
|
|
; GCN: %fmaadd = fadd fast float %tmp, %y
|
|
|
|
define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0
|
|
|
|
; GCN: %fmamul = fmul fast float %tmp1, %tmp
|
|
|
|
define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp = load float, float addrspace(1)* %arrayidx, align 4
|
|
|
|
%tmp1 = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3expf(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3expf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z4exp2f(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z4exp2f(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5exp10f(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z5exp10f(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3logf(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3logf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z4log2f(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z4log2f(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5log10f(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z5log10f(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
|
|
|
|
; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
|
|
|
|
; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
|
|
|
|
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
|
|
|
|
; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
|
|
|
|
; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
|
|
|
|
define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
|
|
|
%tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
|
|
|
|
%call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z4sqrtf(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-08-29 02:00:08 +08:00
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
|
|
|
|
; GCN: tail call fast double @_Z4sqrtd(double %tmp)
|
|
|
|
define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load double, double addrspace(1)* %a, align 8
|
|
|
|
%call = tail call fast double @_Z4sqrtd(double %tmp)
|
|
|
|
store double %call, double addrspace(1)* %a, align 8
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-08-12 00:42:09 +08:00
|
|
|
declare float @_Z4sqrtf(float)
|
2017-08-29 02:00:08 +08:00
|
|
|
declare double @_Z4sqrtd(double)
|
2017-08-12 00:42:09 +08:00
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z5rsqrtf(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z5rsqrtf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
|
|
|
|
; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%call = tail call fast float @_Z3tanf(float %tmp)
|
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare float @_Z3tanf(float)
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
|
|
|
|
; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp)
|
|
|
|
; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp)
|
|
|
|
define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) {
|
|
|
|
entry:
|
|
|
|
%tmp = load float, float addrspace(1)* %a, align 4
|
|
|
|
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float*
|
|
|
|
%call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
|
2017-08-12 00:42:09 +08:00
|
|
|
store float %call, float addrspace(1)* %a, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare float @_Z6sincosfPf(float, float*)
|
2017-09-06 08:30:27 +08:00
|
|
|
|
|
|
|
%opencl.pipe_t = type opaque
|
|
|
|
%opencl.reserve_id_t = type opaque
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
|
2017-11-13 07:53:44 +08:00
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
|
2017-09-06 08:30:27 +08:00
|
|
|
define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
|
|
|
|
entry:
|
|
|
|
%tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
|
|
|
|
%tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
|
|
|
|
%tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
|
|
|
|
%tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
|
|
|
|
tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
|
2017-09-06 08:30:27 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32)
|
2017-09-06 08:30:27 +08:00
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32)
|
2017-09-06 08:30:27 +08:00
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32)
|
2017-09-06 08:30:27 +08:00
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32)
|
2017-09-06 08:30:27 +08:00
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
|
2017-11-13 07:53:44 +08:00
|
|
|
; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
|
2017-09-06 08:30:27 +08:00
|
|
|
define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
|
|
|
|
entry:
|
|
|
|
%tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
|
|
|
|
%tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
|
|
|
|
%tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
|
|
|
|
%tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
|
|
|
|
tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr
|
2017-09-06 08:30:27 +08:00
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr
|
2017-09-06 08:30:27 +08:00
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr
|
2017-09-06 08:30:27 +08:00
|
|
|
|
2017-11-05 01:37:43 +08:00
|
|
|
declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr
|
2017-09-06 08:30:27 +08:00
|
|
|
|
|
|
|
%struct.S = type { [100 x i32] }
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
|
2017-11-13 07:53:44 +08:00
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]]
|
|
|
|
; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
|
2017-09-06 08:30:27 +08:00
|
|
|
define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
|
|
|
|
entry:
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
|
|
|
|
%tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8*
|
|
|
|
%tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8*
|
|
|
|
%tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8*
|
|
|
|
%tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8*
|
|
|
|
%tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8*
|
|
|
|
%tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8*
|
|
|
|
%tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8*
|
|
|
|
%tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
%tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
|
2017-11-05 01:37:43 +08:00
|
|
|
%tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8*
|
|
|
|
%tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
|
2017-09-06 08:30:27 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Fix discarded result of addAttribute
Summary:
`llvm::AttributeList` and `llvm::AttributeSet` are immutable, and so methods
defined on these classes, such as `addAttribute`, return a new immutable
object with the attribute added. In https://reviews.llvm.org/D55217 I attempted
to annotate methods such as `addAttribute` with `LLVM_NODISCARD`, since
calling these methods has no side-effects, and so ignoring the result
that is returned is almost certainly a programmer error.
However, committing the change resulted in new warnings in the AMDGPU target.
The AMDGPU simplify libcalls pass added in https://reviews.llvm.org/D36436
attempts to add the readonly and nounwind attributes to simplified
library functions, but instead calls the `addAttribute` methods and
ignores the result.
Modify the simplify libcalls pass to actually add the nounwind and
readonly attributes. Also update the simplify libcalls test to assert
that these attributes are actually being set.
Reviewers: rampitec, vpykhtin, rnk
Reviewed By: rampitec
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D55435
llvm-svn: 348732
2018-12-10 05:56:50 +08:00
|
|
|
; GCN-PRELINK: declare float @_Z4fabsf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
|
|
|
|
; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
|
|
|
|
; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
|
|
|
|
|
2017-11-13 07:53:44 +08:00
|
|
|
; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
|
[AMDGPU] Fix discarded result of addAttribute
Summary:
`llvm::AttributeList` and `llvm::AttributeSet` are immutable, and so methods
defined on these classes, such as `addAttribute`, return a new immutable
object with the attribute added. In https://reviews.llvm.org/D55217 I attempted
to annotate methods such as `addAttribute` with `LLVM_NODISCARD`, since
calling these methods has no side-effects, and so ignoring the result
that is returned is almost certainly a programmer error.
However, committing the change resulted in new warnings in the AMDGPU target.
The AMDGPU simplify libcalls pass added in https://reviews.llvm.org/D36436
attempts to add the readonly and nounwind attributes to simplified
library functions, but instead calls the `addAttribute` methods and
ignores the result.
Modify the simplify libcalls pass to actually add the nounwind and
readonly attributes. Also update the simplify libcalls test to assert
that these attributes are actually being set.
Reviewers: rampitec, vpykhtin, rnk
Reviewed By: rampitec
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D55435
llvm-svn: 348732
2018-12-10 05:56:50 +08:00
|
|
|
; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nounwind readonly }
|
2017-09-06 08:30:27 +08:00
|
|
|
attributes #0 = { nounwind }
|