2018-08-07 05:38:27 +08:00
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
|
2017-07-13 05:20:28 +08:00
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
|
2017-08-30 11:03:38 +08:00
|
|
|
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
|
|
|
; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%v = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
|
|
|
|
; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-NOT: 1.0
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fmul float %load, 15.0
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_legacy_value_f32:
|
|
|
|
; GCN: v_mul_legacy_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.amdgcn.fmul.legacy(float %load, float 15.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
|
|
|
|
; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fsub float 15.0, %load
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
|
|
|
|
; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fadd float %load, 15.0
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
|
|
|
|
; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.sqrt.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
|
|
|
|
; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.ceil.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
|
|
|
|
; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.floor.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
|
2018-09-11 19:56:50 +08:00
|
|
|
; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x41700000
|
|
|
|
; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SREG]], [[SREG]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
|
2018-09-11 19:56:50 +08:00
|
|
|
; GCN: s_mov_b32 [[SGPR:s[0-9]+]], 0x41700000
|
|
|
|
; GCN: v_mad_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SGPR]], [[SGPR]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.amdgcn.fmad.ftz.f32(float %load, float 15.0, float 15.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
|
|
|
|
; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
|
2018-09-11 19:56:50 +08:00
|
|
|
; GCN-DENORM: s_mov_b32 [[SREG:s[0-9]+]], 0x41700000
|
|
|
|
; GCN-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SREG]], [[SREG]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2018-09-11 19:56:50 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
|
|
|
; GCN-NOT: 1.0
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
|
2017-08-30 11:03:38 +08:00
|
|
|
; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
|
|
|
|
; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = call float @llvm.canonicalize.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
|
|
|
|
; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fpext float %load to double
|
|
|
|
%canonicalized = tail call double @llvm.canonicalize.f64(double %v)
|
|
|
|
%gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
|
|
|
|
store double %canonicalized, double addrspace(1)* %gep2, align 8
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
|
|
|
|
; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load half, half addrspace(1)* %gep, align 2
|
|
|
|
%v = fpext half %load to float
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
%gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep2, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:51:52 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
|
|
|
|
; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load half, half addrspace(1)* %gep, align 2
|
|
|
|
%v = fpext half %load to float
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
%gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep2, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
|
|
|
|
; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load double, double addrspace(1)* %gep, align 8
|
|
|
|
%v = fptrunc double %load to float
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
%gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep2, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
|
|
|
|
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:51:52 +08:00
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN-NOT: v_mul
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fptrunc float %load to half
|
|
|
|
%canonicalized = tail call half @llvm.canonicalize.f16(half %v)
|
|
|
|
%gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
|
|
|
|
store half %canonicalized, half addrspace(1)* %gep2, align 2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:51:52 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
|
|
|
|
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fptrunc float %load to half
|
|
|
|
%canonicalized = tail call half @llvm.canonicalize.f16(half %v)
|
|
|
|
%gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
|
|
|
|
store half %canonicalized, half addrspace(1)* %gep2, align 2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
|
|
|
|
; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
|
|
|
|
; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
|
|
|
|
; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8
|
|
|
|
%v = fptrunc <2 x float> %load to <2 x half>
|
|
|
|
%canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
|
|
|
|
%gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
|
|
|
|
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
|
2018-07-30 20:16:58 +08:00
|
|
|
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
|
2017-08-30 11:03:38 +08:00
|
|
|
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = fsub float -0.0, %load
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
|
|
|
|
; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v0 = fadd float %load, 0.0
|
|
|
|
%v = fsub float -0.0, %v0
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
|
2017-08-30 11:03:38 +08:00
|
|
|
; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
|
|
|
|
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.fabs.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
|
|
|
|
; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
|
|
|
|
; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
|
|
|
|
; GCN-NOT: v_mul_
|
|
|
|
; GCN-NOT: v_max_
|
|
|
|
define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(float addrspace(1)* %arg, float %sign) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%canon.load = tail call float @llvm.canonicalize.f32(float %load)
|
|
|
|
%copysign = call float @llvm.copysign.f32(float %canon.load, float %sign)
|
|
|
|
%v = tail call float @llvm.fabs.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
|
|
|
|
; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v0 = fadd float %load, 0.0
|
|
|
|
%v = tail call float @llvm.fabs.f32(float %v0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
|
|
|
|
; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.sin.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
|
|
|
|
; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.cos.f32(float %load)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
|
|
|
|
; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load half, half addrspace(1)* %gep, align 2
|
|
|
|
%v = tail call half @llvm.sin.f16(half %load)
|
|
|
|
%canonicalized = tail call half @llvm.canonicalize.f16(half %v)
|
|
|
|
store half %canonicalized, half addrspace(1)* %gep, align 2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
|
|
|
|
; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
|
|
|
|
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load half, half addrspace(1)* %gep, align 2
|
|
|
|
%v = tail call half @llvm.cos.f16(half %load)
|
|
|
|
%canonicalized = tail call half @llvm.canonicalize.f16(half %v)
|
|
|
|
store half %canonicalized, half addrspace(1)* %gep, align 2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
|
|
|
|
; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
|
2018-10-23 00:27:27 +08:00
|
|
|
; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
|
|
|
|
; GCN-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
|
|
|
|
; GCN-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
|
|
|
|
; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, [[QUIET]]
|
2018-08-07 05:38:27 +08:00
|
|
|
|
2018-10-23 00:27:27 +08:00
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN-NOT: v_mul
|
2018-08-07 05:38:27 +08:00
|
|
|
|
2017-07-29 09:03:53 +08:00
|
|
|
; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2018-08-07 05:38:27 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %load, float 0.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode:
|
|
|
|
; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
2018-10-23 00:27:27 +08:00
|
|
|
; GCN-DENORM-NOT: v_max
|
|
|
|
; GCN-DENORM-NOT: v_mul
|
|
|
|
|
|
|
|
; GCN: v_min_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
|
|
|
; GCN-DENORM-NOT: v_max
|
|
|
|
; GCN-DENORM-NOT: v_mul
|
2018-08-07 05:38:27 +08:00
|
|
|
|
|
|
|
; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}]
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode(float addrspace(1)* %arg) #1 {
|
2017-07-13 05:20:28 +08:00
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %load, float 0.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
|
|
|
|
; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v0 = fadd float %load, 0.0
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-07-15 23:14:40 +08:00
|
|
|
; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
|
|
|
|
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
|
|
|
|
; GCN-DENORM: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
|
2018-10-23 00:27:27 +08:00
|
|
|
; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
|
|
|
|
|
|
|
|
; GFX9-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
|
|
|
|
; GFX9-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[QUIET]]
|
|
|
|
|
|
|
|
; GFX9-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
|
|
|
|
; GFX9-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
|
2018-08-07 05:38:27 +08:00
|
|
|
|
|
|
|
|
2018-10-23 00:27:27 +08:00
|
|
|
; VI-FLUSH: v_mul_f32_e32 [[QUIET_V0:v[0-9]+]], 1.0, [[VAL]]
|
|
|
|
; VI-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_V0]]
|
2018-08-07 05:38:27 +08:00
|
|
|
|
2018-10-23 00:27:27 +08:00
|
|
|
; VI-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[VAL]]
|
2018-08-07 05:38:27 +08:00
|
|
|
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-11-13 07:53:44 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
|
2018-10-23 00:27:27 +08:00
|
|
|
; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
|
|
|
|
|
|
|
|
; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[VAL]]
|
|
|
|
|
|
|
|
; VI-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
|
|
|
|
; VI-FLUSH: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
|
2018-08-07 05:38:27 +08:00
|
|
|
|
2018-10-23 00:27:27 +08:00
|
|
|
; VI-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[VAL]]
|
2018-08-07 05:38:27 +08:00
|
|
|
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-11-13 07:53:44 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
|
2018-08-07 05:38:27 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode(float addrspace(1)* %arg) {
|
2017-07-13 05:20:28 +08:00
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
|
|
|
|
; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN-NOT: v_mul
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%v0 = fadd float %load, 0.0
|
|
|
|
%v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
|
|
|
|
; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
|
2017-07-13 05:20:28 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
|
|
|
|
%load = load double, double addrspace(1)* %gep, align 8
|
|
|
|
%v0 = fadd double %load, 0.0
|
|
|
|
%v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
|
|
|
|
%canonicalized = tail call double @llvm.canonicalize.f64(double %v)
|
|
|
|
store double %canonicalized, double addrspace(1)* %gep, align 8
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: test_fold_canonicalize_fmul_value_f32_no_ieee:
|
2018-08-04 02:27:52 +08:00
|
|
|
; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN-NEXT: ; return
|
2018-08-07 05:38:27 +08:00
|
|
|
define amdgpu_ps float @test_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
|
2017-07-13 05:20:28 +08:00
|
|
|
entry:
|
|
|
|
%v = fmul float %arg, 15.0
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
|
|
|
|
; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
2017-07-13 05:20:28 +08:00
|
|
|
; GCN-NEXT: ; return
|
|
|
|
define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
|
|
|
|
entry:
|
|
|
|
%v = fmul nnan float %arg, 15.0
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_fdiv_value_f32_no_ieee:
|
|
|
|
; GCN: v_div_fixup_f32
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN: ; return
|
|
|
|
define amdgpu_ps float @test_fold_canonicalize_fdiv_value_f32_no_ieee(float %arg0) {
|
|
|
|
entry:
|
|
|
|
%v = fdiv float 15.0, %arg0
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
2017-07-14 07:59:15 +08:00
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
|
2017-07-29 09:03:53 +08:00
|
|
|
; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
|
|
|
|
; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]]
|
2017-07-14 07:59:15 +08:00
|
|
|
; GFX9-DENORM-NOT: 1.0
|
|
|
|
; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%v = load float, float addrspace(1)* %gep, align 4
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
%gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep2, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]],
|
|
|
|
; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul_
|
|
|
|
; GCN-NOT: v_max_
|
2017-07-14 07:59:15 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
|
|
|
|
%v = load double, double addrspace(1)* %gep, align 8
|
|
|
|
%canonicalized = tail call double @llvm.canonicalize.f64(double %v)
|
|
|
|
%gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
|
|
|
|
store double %canonicalized, double addrspace(1)* %gep2, align 8
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
|
2017-07-29 09:03:53 +08:00
|
|
|
; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]],
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-NOT: v_mul
|
|
|
|
; GCN-NOT: v_max
|
|
|
|
; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V]]
|
2017-07-14 07:59:15 +08:00
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
|
|
|
|
%v = load half, half addrspace(1)* %gep, align 2
|
|
|
|
%canonicalized = tail call half @llvm.canonicalize.f16(half %v)
|
|
|
|
%gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
|
|
|
|
store half %canonicalized, half addrspace(1)* %gep2, align 2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_select_value_f32:
|
|
|
|
; GCN: v_add_f32
|
|
|
|
; GCN: v_add_f32
|
|
|
|
; GCN: v_cndmask_b32
|
|
|
|
; GCN-NOT: v_mul_
|
|
|
|
; GCN-NOT: v_max_
|
|
|
|
define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(float addrspace(1)* %arg) {
|
|
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
|
|
|
%load0 = load volatile float, float addrspace(1)* %gep, align 4
|
|
|
|
%load1 = load volatile float, float addrspace(1)* %gep, align 4
|
|
|
|
%load2 = load volatile i32, i32 addrspace(1)* undef, align 4
|
|
|
|
%v0 = fadd float %load0, 15.0
|
|
|
|
%v1 = fadd float %load1, 32.0
|
|
|
|
%cond = icmp eq i32 %load2, 0
|
|
|
|
%select = select i1 %cond, float %v0, float %v1
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %select)
|
|
|
|
store float %canonicalized, float addrspace(1)* %gep, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Need to quiet the nan with a separate instruction since it will be
|
|
|
|
; passed through the minnum.
|
2018-10-23 00:27:27 +08:00
|
|
|
; FIXME: canonicalize doens't work correctly without ieee_mode
|
2018-08-07 05:38:27 +08:00
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_no_ieee_mode:
|
2018-10-23 00:27:27 +08:00
|
|
|
; GFX9-NOT: v0
|
|
|
|
; GFX9-NOT: v1
|
2018-08-07 05:38:27 +08:00
|
|
|
; GFX9: v_min_f32_e32 v0, v0, v1
|
|
|
|
; GFX9-NEXT: ; return to shader
|
|
|
|
|
2018-10-23 00:27:27 +08:00
|
|
|
; VI-FLUSH: v_min_f32_e32 v0, v0, v1
|
|
|
|
; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
|
|
; VI-FLUSH-NEXT: ; return
|
|
|
|
|
|
|
|
; VI-DENORM-NOT: v0
|
|
|
|
; VI-DENORM: v_min_f32_e32 v0, v0, v1
|
|
|
|
; VI-DENORM-NEXT: ; return
|
2018-08-07 05:38:27 +08:00
|
|
|
define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %arg0, float %arg1) {
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_ieee_mode:
|
|
|
|
; GFX9: v_min_f32_e32 v0, v0, v1
|
|
|
|
; GFX9-NEXT: s_setpc_b64
|
|
|
|
|
2018-10-23 00:27:27 +08:00
|
|
|
; VI-FLUSH-DAG: v_mul_f32_e32 v0, 1.0, v0
|
|
|
|
; VI-FLUSH-DAG: v_mul_f32_e32 v1, 1.0, v1
|
|
|
|
; VI-FLUSH: v_min_f32_e32 v0, v0, v1
|
|
|
|
|
|
|
|
; VI-DENORM-DAG: v_max_f32_e32 v0, v0, v0
|
|
|
|
; VI-DENORM-DAG: v_max_f32_e32 v1, v1, v1
|
|
|
|
; VI-DENORM: v_min_f32_e32 v0, v0, v1
|
|
|
|
|
2018-08-07 05:38:27 +08:00
|
|
|
; VI-NEXT: s_setpc_b64
|
|
|
|
define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) {
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; Canonicalizing flush necessary pre-gfx9
|
|
|
|
; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
|
|
|
|
; GCN: v_min_f32_e32 v0, v0, v1
|
|
|
|
; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
|
|
|
|
; GCN-NEXT: ; return
|
|
|
|
define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) #1 {
|
|
|
|
%v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
|
|
|
|
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
2018-08-07 06:45:51 +08:00
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_v2f16:
|
|
|
|
; GFX9-DAG: v_add_f16_e32
|
|
|
|
; GFX9-DAG: v_mul_f16_e32
|
|
|
|
; GFX9-NOT: v_max
|
|
|
|
; GFX9-NOT: v_pk_max
|
|
|
|
define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
|
|
|
|
%lo = extractelement <2 x half> %vec, i32 0
|
|
|
|
%hi = extractelement <2 x half> %vec, i32 1
|
|
|
|
%lo.op = fadd half %lo, 1.0
|
|
|
|
%hi.op = fmul half %lo, 4.0
|
|
|
|
%ins0 = insertelement <2 x half> undef, half %lo.op, i32 0
|
|
|
|
%ins1 = insertelement <2 x half> %ins0, half %hi.op, i32 1
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon1_v2f16:
|
|
|
|
; GFX9: v_add_f16_e32
|
|
|
|
; GFX9: v_pk_max
|
|
|
|
define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %vec) {
|
|
|
|
%lo = extractelement <2 x half> %vec, i32 0
|
|
|
|
%lo.op = fadd half %lo, 1.0
|
|
|
|
%ins = insertelement <2 x half> %vec, half %lo.op, i32 0
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon0_v2f16:
|
|
|
|
; GFX9: v_add_f16_sdwa
|
|
|
|
; GFX9: v_pk_max
|
|
|
|
define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %vec) {
|
|
|
|
%hi = extractelement <2 x half> %vec, i32 1
|
|
|
|
%hi.op = fadd half %hi, 1.0
|
|
|
|
%ins = insertelement <2 x half> %vec, half %hi.op, i32 1
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_extract_element_v2f16:
|
|
|
|
; GFX9: s_waitcnt
|
2018-08-16 05:34:06 +08:00
|
|
|
; GFX9-NEXT: v_mul_f16_e32 v0, 4.0, v0
|
2018-08-07 06:45:51 +08:00
|
|
|
; GFX9-NEXT: s_setpc_b64
|
|
|
|
define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
|
|
|
|
%vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
|
|
|
|
%elt = extractelement <2 x half> %vec.op, i32 0
|
|
|
|
%canonicalized = call half @llvm.canonicalize.f16(half %elt)
|
|
|
|
ret half %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_v2f16:
|
|
|
|
; GFX9: v_mul_f16_e32
|
[AMDGPU] Fix for vector element insertion
Summary:
Incorrect code was generated when lowering insertelement operations
for vectors with 8 or 16 bit elements. The value being inserted was
not adjusted for the position of the element within the 32 bit word
and so only the low element within each 32 bit word could receive
the intended value.
Fixed by simply replicating the value to each element of a
congruent vector before the mask and or operation used to
update the intended element.
A number of affected LIT tests have been updated appropriately.
before the mask & or into the intended
Reviewers: arsenm, nhaehnle
Reviewed By: arsenm
Subscribers: llvm-commits, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57588
llvm-svn: 352885
2019-02-02 00:51:09 +08:00
|
|
|
; GFX9: v_pk_mul_f16
|
2018-08-07 06:45:51 +08:00
|
|
|
; GFX9-NOT: v_max
|
|
|
|
; GFX9-NOT: v_pk_max
|
|
|
|
define <2 x half> @v_test_canonicalize_insertelement_v2f16(<2 x half> %vec, half %val, i32 %idx) {
|
|
|
|
%vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
|
|
|
|
%ins.op = fmul half %val, 8.0
|
|
|
|
%ins = insertelement <2 x half> %vec.op, half %ins.op, i32 %idx
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_vec_v2f16:
|
|
|
|
; GFX9: v_mul_f16
|
|
|
|
; GFX9: v_pk_max_f16 v0, v0, v0
|
|
|
|
; GFX9-NEXT: s_setpc_b64
|
|
|
|
define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x half> %vec, half %val, i32 %idx) {
|
|
|
|
%ins.op = fmul half %val, 8.0
|
|
|
|
%ins = insertelement <2 x half> %vec, half %ins.op, i32 %idx
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_insval_v2f16:
|
|
|
|
; GFX9: v_pk_mul_f16
|
|
|
|
; GFX9: v_pk_max_f16 v0, v0, v0
|
|
|
|
; GFX9-NEXT: s_setpc_b64
|
|
|
|
define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x half> %vec, half %val, i32 %idx) {
|
|
|
|
%vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
|
|
|
|
%ins = insertelement <2 x half> %vec.op, half %val, i32 %idx
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
2018-08-07 07:01:31 +08:00
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_cvt_pkrtz:
|
|
|
|
; GCN: s_waitcnt
|
|
|
|
; GCN-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
|
|
|
|
; GCN-NEXT: s_setpc_b64
|
|
|
|
define <2 x half> @v_test_canonicalize_cvt_pkrtz(float %a, float %b) {
|
|
|
|
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
|
|
|
|
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %cvt)
|
|
|
|
ret <2 x half> %canonicalized
|
|
|
|
}
|
|
|
|
|
2018-08-11 03:20:17 +08:00
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_cubeid:
|
|
|
|
; GCN: s_waitcnt
|
|
|
|
; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
|
|
|
|
; GCN-NEXT: s_setpc_b64
|
|
|
|
define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
|
|
|
|
%cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
|
|
|
|
%canonicalized = call float @llvm.canonicalize.f32(float %cvt)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_test_canonicalize_frexp_mant:
|
|
|
|
; GCN: s_waitcnt
|
|
|
|
; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
|
|
|
|
; GCN-NEXT: s_setpc_b64
|
|
|
|
define float @v_test_canonicalize_frexp_mant(float %a) {
|
|
|
|
%cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
|
|
|
|
%canonicalized = call float @llvm.canonicalize.f32(float %cvt)
|
|
|
|
ret float %canonicalized
|
|
|
|
}
|
|
|
|
|
Fix CodeGen/AMDGPU/fcanonicalize-elimination.ll on FreeBSD 11.0
Summary:
On FreeBSD11.0 the FileCheck NOT string "1.0" will be matched by
`.amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802"` at the end of the
file. Add a CHECK for that directive to avoid failing the test.
Reviewers: rampitec, kzhuravl
Reviewed By: rampitec, kzhuravl
Subscribers: emaste, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits, krytarowski
Differential Revision: https://reviews.llvm.org/D39306
llvm-svn: 316616
2017-10-26 05:44:21 +08:00
|
|
|
; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
|
|
|
|
; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
|
|
|
|
; CHECK: .amd_amdgpu_isa
|
|
|
|
|
2017-07-13 05:20:28 +08:00
|
|
|
declare float @llvm.canonicalize.f32(float) #0
|
2018-08-07 05:38:27 +08:00
|
|
|
declare float @llvm.copysign.f32(float, float) #0
|
|
|
|
declare float @llvm.amdgcn.fmul.legacy(float, float) #0
|
|
|
|
declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float) #0
|
2017-07-13 05:20:28 +08:00
|
|
|
declare double @llvm.canonicalize.f64(double) #0
|
|
|
|
declare half @llvm.canonicalize.f16(half) #0
|
|
|
|
declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
declare float @llvm.sqrt.f32(float) #0
|
|
|
|
declare float @llvm.ceil.f32(float) #0
|
|
|
|
declare float @llvm.floor.f32(float) #0
|
|
|
|
declare float @llvm.fma.f32(float, float, float) #0
|
|
|
|
declare float @llvm.fmuladd.f32(float, float, float) #0
|
|
|
|
declare float @llvm.fabs.f32(float) #0
|
|
|
|
declare float @llvm.sin.f32(float) #0
|
|
|
|
declare float @llvm.cos.f32(float) #0
|
|
|
|
declare half @llvm.sin.f16(half) #0
|
|
|
|
declare half @llvm.cos.f16(half) #0
|
|
|
|
declare float @llvm.minnum.f32(float, float) #0
|
|
|
|
declare float @llvm.maxnum.f32(float, float) #0
|
|
|
|
declare double @llvm.maxnum.f64(double, double) #0
|
2018-08-07 07:01:31 +08:00
|
|
|
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
|
2018-08-11 03:20:17 +08:00
|
|
|
declare float @llvm.amdgcn.cubeid(float, float, float) #0
|
|
|
|
declare float @llvm.amdgcn.frexp.mant.f32(float) #0
|
2017-07-13 05:20:28 +08:00
|
|
|
|
|
|
|
attributes #0 = { nounwind readnone }
|
2017-07-14 07:59:15 +08:00
|
|
|
attributes #1 = { "no-nans-fp-math"="true" }
|
2018-08-07 05:51:52 +08:00
|
|
|
attributes #2 = { "target-features"="-fp64-fp16-denormals" }
|