diff --git a/llvm/test/CodeGen/AMDGPU/address-space.ll b/llvm/test/CodeGen/AMDGPU/address-space.ll deleted file mode 100644 index de206f18991f..000000000000 --- a/llvm/test/CodeGen/AMDGPU/address-space.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s - -; Test that codegenprepare understands address space sizes - -%struct.foo = type { [3 x float], [3 x float] } - -; CHECK-LABEL: {{^}}do_as_ptr_calcs: -; CHECK: s_load_dword [[SREG1:s[0-9]+]], -; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] -; CHECK-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 -define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { -entry: - %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 - %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 - br label %bb32 - -bb32: - %a = load float, float addrspace(3)* %x, align 4 - %b = load float, float addrspace(3)* %y, align 4 - %cmp = fcmp one float %a, %b - br i1 %cmp, label %bb34, label %bb33 - -bb33: - unreachable - -bb34: - unreachable -} - - diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index a0857273e3e3..916d667ec492 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -447,6 +447,34 @@ done: ret void } +%struct.foo = type { [3 x float], [3 x float] } + +; OPT-LABEL: @sink_ds_address( +; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64 + +; GCN-LABEL: {{^}}sink_ds_address: +; GCN: s_load_dword [[SREG1:s[0-9]+]], +; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] +; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 +define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { +entry: + %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 + %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 + br label %bb32 + +bb32: + %a = load float, float addrspace(3)* %x, align 4 + %b = load float, float addrspace(3)* %y, align 4 + %cmp = fcmp one float %a, %b + br i1 %cmp, label %bb34, label %bb33 + +bb33: + unreachable + +bb34: + unreachable +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll new file mode 100644 index 000000000000..ce0881c329be --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}v_safe_fsqrt_f64: +; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +define void @v_safe_fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #1 { + %r0 = load double, double addrspace(1)* %in + %r1 = call double @llvm.sqrt.f64(double %r0) + store double %r1, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_unsafe_fsqrt_f64: +; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +define void @v_unsafe_fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #2 { + %r0 = load double, double addrspace(1)* %in + %r1 = call double @llvm.sqrt.f64(double %r0) + store double %r1, double addrspace(1)* %out + ret void +} + +declare double @llvm.sqrt.f64(double %Val) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind "unsafe-fp-math"="false" } +attributes #2 = { nounwind "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.ll index 04101346cdf9..f98cac6ade3a 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.ll @@ -1,29 +1,143 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + ; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x) -; CHECK: {{^}}fsqrt_f32: -; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}} - -define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %r0 = load float, float addrspace(1)* %in - %r1 = call float @llvm.sqrt.f32(float %r0) - store float %r1, float addrspace(1)* %out - ret void +; FUNC-LABEL: {{^}}v_safe_fsqrt_f32: +; GCN: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}} +define void @v_safe_fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 { + %r0 = load float, float addrspace(1)* %in + %r1 = call float @llvm.sqrt.f32(float %r0) + store float %r1, float addrspace(1)* %out + ret void } -; CHECK: {{^}}fsqrt_f64: -; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} - -define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) { - %r0 = load double, double addrspace(1)* %in - %r1 = call double @llvm.sqrt.f64(double %r0) - store double %r1, double addrspace(1)* %out - ret void +; FUNC-LABEL: {{^}}v_unsafe_fsqrt_f32: +; GCN: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}} +define void @v_unsafe_fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #2 { + %r0 = load float, float addrspace(1)* %in + %r1 = call float @llvm.sqrt.f32(float %r0) + store float %r1, float addrspace(1)* %out + ret void } -declare float @llvm.sqrt.f32(float %Val) -declare double @llvm.sqrt.f64(double %Val) + +; FUNC-LABEL: {{^}}s_sqrt_f32: +; GCN: v_sqrt_f32_e32 + +; R600: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z +; R600: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS +define void @s_sqrt_f32(float addrspace(1)* %out, float %in) #1 { +entry: + %fdiv = call float @llvm.sqrt.f32(float %in) + store float %fdiv, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sqrt_v2f32: +; GCN: v_sqrt_f32_e32 +; GCN: v_sqrt_f32_e32 + +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS +define void @s_sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 { +entry: + %fdiv = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sqrt_v4f32: +; GCN: v_sqrt_f32_e32 +; GCN: v_sqrt_f32_e32 +; GCN: v_sqrt_f32_e32 +; GCN: v_sqrt_f32_e32 + +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS +; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X +; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS +define void @s_sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 { +entry: + %fdiv = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) + store <4 x float> %fdiv, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}elim_redun_check_neg0: +; GCN: v_sqrt_f32_e32 +; GCN-NOT: v_cndmask +define void @elim_redun_check_neg0(float addrspace(1)* %out, float %in) #1 { +entry: + %sqrt = call float @llvm.sqrt.f32(float %in) + %cmp = fcmp olt float %in, -0.000000e+00 + %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt + store float %res, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}elim_redun_check_pos0: +; GCN: v_sqrt_f32_e32 +; GCN-NOT: v_cndmask +define void @elim_redun_check_pos0(float addrspace(1)* %out, float %in) #1 { +entry: + %sqrt = call float @llvm.sqrt.f32(float %in) + %cmp = fcmp olt float %in, 0.000000e+00 + %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt + store float %res, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}elim_redun_check_ult: +; GCN: v_sqrt_f32_e32 +; GCN-NOT: v_cndmask +define void @elim_redun_check_ult(float addrspace(1)* %out, float %in) #1 { +entry: + %sqrt = call float @llvm.sqrt.f32(float %in) + %cmp = fcmp ult float %in, -0.000000e+00 + %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt + store float %res, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}elim_redun_check_v2: +; GCN: v_sqrt_f32_e32 +; GCN: v_sqrt_f32_e32 +; GCN-NOT: v_cndmask +define void @elim_redun_check_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 { +entry: + %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) + %cmp = fcmp olt <2 x float> %in, + %res = select <2 x i1> %cmp, <2 x float> , <2 x float> %sqrt + store <2 x float> %res, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}elim_redun_check_v2_ult +; GCN: v_sqrt_f32_e32 +; GCN: v_sqrt_f32_e32 +; GCN-NOT: v_cndmask +define void @elim_redun_check_v2_ult(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 { +entry: + %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) + %cmp = fcmp ult <2 x float> %in, + %res = select <2 x i1> %cmp, <2 x float> , <2 x float> %sqrt + store <2 x float> %res, <2 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.sqrt.f32(float %in) #0 +declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) #0 +declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind "unsafe-fp-math"="false" } +attributes #2 = { nounwind "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.ll deleted file mode 100644 index c8ac196e659a..000000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.ll +++ /dev/null @@ -1,117 +0,0 @@ -; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600 -; RUN: llc < %s -march=amdgcn --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI -; RUN: llc < %s -march=amdgcn --mcpu=tonga -verify-machineinstrs| FileCheck %s --check-prefix=SI - -; R600-LABEL: {{^}}sqrt_f32: -; R600: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z -; R600: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS -; SI-LABEL: {{^}}sqrt_f32: -; SI: v_sqrt_f32_e32 -define void @sqrt_f32(float addrspace(1)* %out, float %in) { -entry: - %0 = call float @llvm.sqrt.f32(float %in) - store float %0, float addrspace(1)* %out - ret void -} - -; R600-LABEL: {{^}}sqrt_v2f32: -; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W -; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS -; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X -; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS -; SI-LABEL: {{^}}sqrt_v2f32: -; SI: v_sqrt_f32_e32 -; SI: v_sqrt_f32_e32 -define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { -entry: - %0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out - ret void -} - -; R600-LABEL: {{^}}sqrt_v4f32: -; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y -; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS -; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z -; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS -; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W -; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS -; R600-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X -; R600-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS -; SI-LABEL: {{^}}sqrt_v4f32: -; SI: v_sqrt_f32_e32 -; SI: v_sqrt_f32_e32 -; SI: v_sqrt_f32_e32 -; SI: v_sqrt_f32_e32 -define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { -entry: - %0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out - ret void -} - -; SI-LABEL: {{^}}elim_redun_check_neg0: -; SI: v_sqrt_f32_e32 -; SI-NOT: v_cndmask -define void @elim_redun_check_neg0(float addrspace(1)* %out, float %in) { -entry: - %sqrt = call float @llvm.sqrt.f32(float %in) - %cmp = fcmp olt float %in, -0.000000e+00 - %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt - store float %res, float addrspace(1)* %out - ret void -} - -; SI-LABEL: {{^}}elim_redun_check_pos0: -; SI: v_sqrt_f32_e32 -; SI-NOT: v_cndmask -define void @elim_redun_check_pos0(float addrspace(1)* %out, float %in) { -entry: - %sqrt = call float @llvm.sqrt.f32(float %in) - %cmp = fcmp olt float %in, 0.000000e+00 - %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt - store float %res, float addrspace(1)* %out - ret void -} - -; SI-LABEL: {{^}}elim_redun_check_ult: -; SI: v_sqrt_f32_e32 -; SI-NOT: v_cndmask -define void @elim_redun_check_ult(float addrspace(1)* %out, float %in) { -entry: - %sqrt = call float @llvm.sqrt.f32(float %in) - %cmp = fcmp ult float %in, -0.000000e+00 - %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt - store float %res, float addrspace(1)* %out - ret void -} - -; SI-LABEL: {{^}}elim_redun_check_v2: -; SI: v_sqrt_f32_e32 -; SI: v_sqrt_f32_e32 -; SI-NOT: v_cndmask -define void @elim_redun_check_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) { -entry: - %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) - %cmp = fcmp olt <2 x float> %in, - %res = select <2 x i1> %cmp, <2 x float> , <2 x float> %sqrt - store <2 x float> %res, <2 x float> addrspace(1)* %out - ret void -} - -; SI-LABEL: {{^}}elim_redun_check_v2_ult -; SI: v_sqrt_f32_e32 -; SI: v_sqrt_f32_e32 -; SI-NOT: v_cndmask -define void @elim_redun_check_v2_ult(<2 x float> addrspace(1)* %out, <2 x float> %in) { -entry: - %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) - %cmp = fcmp ult <2 x float> %in, - %res = select <2 x i1> %cmp, <2 x float> , <2 x float> %sqrt - store <2 x float> %res, <2 x float> addrspace(1)* %out - ret void -} - -declare float @llvm.sqrt.f32(float %in) -declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) -declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)