llvm-project/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s

; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600

; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f16:
define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {
  %result = sitofp i64 %in to half
  store half %result, half addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}v_sint_to_fp_i64_to_f16:
; GCN: {{buffer|flat}}_load_dwordx2

; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; GCN: v_xor_b32

; GCN: v_ffbh_u32
; GCN: v_ffbh_u32
; GCN: v_cndmask
; GCN: v_cndmask

; GCN-DAG: v_cmp_eq_u64
; GCN-DAG: v_cmp_gt_u64

; GCN: v_cndmask_b32_e64 [[SIGN_SEL:v[0-9]+]], v{{[0-9]+}}, -v{{[0-9]+}}
; GCN: v_cvt_f16_f32_e32 [[SIGN_SEL_F16:v[0-9]+]], [[SIGN_SEL]]
; GCN: {{buffer|flat}}_store_short {{.*}}[[SIGN_SEL_F16]]
define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
  %val = load i64, i64 addrspace(1)* %in.gep
  %result = sitofp i64 %val to half
  store half %result, half addrspace(1)* %out.gep
  ret void
}

; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f32:
define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
  %result = sitofp i64 %in to float
  store float %result, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}v_sint_to_fp_i64_to_f32:
; GCN: {{buffer|flat}}_load_dwordx2

; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
; GCN: v_xor_b32

; GCN: v_ffbh_u32
; GCN: v_ffbh_u32
; GCN: v_cndmask
; GCN: v_cndmask

; GCN-DAG: v_cmp_eq_u64
; GCN-DAG: v_cmp_gt_u64

; GCN: v_cndmask_b32_e64 [[SIGN_SEL:v[0-9]+]], v{{[0-9]+}}, -v{{[0-9]+}}
; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]]
define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  %val = load i64, i64 addrspace(1)* %in.gep
  %result = sitofp i64 %val to float
  store float %result, float addrspace(1)* %out.gep
  ret void
}

; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64_to_v2f32:
; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,
define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
  %result = sitofp <2 x i64> %in to <2 x float>
  store <2 x float> %result, <2 x float> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64_to_v4f32:
define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
  %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
  %result = sitofp <4 x i64> %value to <4 x float>
  store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
  ret void
}

; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64_to_v2f16:
; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,
define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{
  %result = sitofp <2 x i64> %in to <2 x half>
  store <2 x half> %result, <2 x half> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64_to_v4f16:
define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
  %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
  %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
  %result = sitofp <4 x i64> %value to <4 x half>
  store <4 x half> %result, <4 x half> addrspace(1)* %out.gep
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00
			`; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600`

[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f16:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 %in) #0 {`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`%result = sitofp i64 %in to half`
			`store half %result, half addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}v_sint_to_fp_i64_to_f16:`
			`; GCN: {{buffer\|flat}}_load_dwordx2`

			`; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}`
			`; GCN: v_xor_b32`

			`; GCN: v_ffbh_u32`
			`; GCN: v_ffbh_u32`
			`; GCN: v_cndmask`
			`; GCN: v_cndmask`

			`; GCN-DAG: v_cmp_eq_u64`
[AMDGPU] Eliminate SGPR to VGPR copy when possible SGPRs are generally cheaper, so try to use them over VGPRs. Differential Revision: https://reviews.llvm.org/D34130 llvm-svn: 305815 2017-06-21 02:32:42 +08:00			`; GCN-DAG: v_cmp_gt_u64`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00
[AMDGPU] Allow abs/neg source modifiers on v_cndmask_b32 Summary: D59191 added support for these modifiers in the assembler and disassembler. This patch just teaches instruction selection that it can use them. Reviewers: arsenm, tstellar Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64497 llvm-svn: 365640 2019-07-10 22:53:47 +08:00			`; GCN: v_cndmask_b32_e64 [[SIGN_SEL:v[0-9]+]], v{{[0-9]+}}, -v{{[0-9]+}}`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`; GCN: v_cvt_f16_f32_e32 [[SIGN_SEL_F16:v[0-9]+]], [[SIGN_SEL]]`
			`; GCN: {{buffer\|flat}}_store_short {{.*}}[[SIGN_SEL_F16]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`%tid = call i32 @llvm.amdgcn.workitem.id.x()`
			`%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid`
			`%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid`
			`%val = load i64, i64 addrspace(1)* %in.gep`
			`%result = sitofp i64 %val to half`
			`store half %result, half addrspace(1)* %out.gep`
			`ret void`
			`}`

AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00			`; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f32:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00			`%result = sitofp i64 %in to float`
			`store float %result, float addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}v_sint_to_fp_i64_to_f32:`
			`; GCN: {{buffer\|flat}}_load_dwordx2`

AMDGPU: Reduce 64-bit SRAs llvm-svn: 258096 2016-01-19 06:09:04 +08:00			`; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00			`; GCN: v_xor_b32`

			`; GCN: v_ffbh_u32`
			`; GCN: v_ffbh_u32`
			`; GCN: v_cndmask`
			`; GCN: v_cndmask`

AMDGPU: Use unsigned compare for eq/ne For some reason there are both of these available, except for scalar 64-bit compares which only has u64. I'm not sure why there are both (I'm guessing it's for the one bit inputs we don't use), but for consistency always using the unsigned one. llvm-svn: 282832 2016-09-30 09:50:20 +08:00			`; GCN-DAG: v_cmp_eq_u64`
[AMDGPU] Eliminate SGPR to VGPR copy when possible SGPRs are generally cheaper, so try to use them over VGPRs. Differential Revision: https://reviews.llvm.org/D34130 llvm-svn: 305815 2017-06-21 02:32:42 +08:00			`; GCN-DAG: v_cmp_gt_u64`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00
[AMDGPU] Allow abs/neg source modifiers on v_cndmask_b32 Summary: D59191 added support for these modifiers in the assembler and disassembler. This patch just teaches instruction selection that it can use them. Reviewers: arsenm, tstellar Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64497 llvm-svn: 365640 2019-07-10 22:53:47 +08:00			`; GCN: v_cndmask_b32_e64 [[SIGN_SEL:v[0-9]+]], v{{[0-9]+}}, -v{{[0-9]+}}`
[AMDGPU] Assembler: Swap operands of flat_store instructions to match AMD assembler Historically, AMD internal sp3 assembler has flat_store* addr, data format. To match existing code and to enable reuse, change LLVM definitions to match. Also update MC and CodeGen tests. Differential Revision: http://reviews.llvm.org/D16927 Patch by: Nikolay Haustov llvm-svn: 260694 2016-02-13 01:57:54 +08:00			`; GCN: {{buffer\|flat}}_store_dword {{.*}}[[SIGN_SEL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`%tid = call i32 @llvm.amdgcn.workitem.id.x()`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00			`%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid`
			`%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid`
			`%val = load i64, i64 addrspace(1)* %in.gep`
			`%result = sitofp i64 %val to float`
			`store float %result, float addrspace(1)* %out.gep`
			`ret void`
			`}`

[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64_to_v2f32:`
AMDGPU: Improve splitting 64-bit bit ops by constants This addresses a TODO to handle operations besides and. This also starts eliminating no-op operations with a constant that can emerge later. llvm-svn: 281488 2016-09-14 23:19:03 +08:00			`; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00			`%result = sitofp <2 x i64> %in to <2 x float>`
			`store <2 x float> %result, <2 x float> addrspace(1)* %out`
			`ret void`
			`}`

[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64_to_v4f32:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`%tid = call i32 @llvm.amdgcn.workitem.id.x()`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00			`%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid`
			`%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid`
			`%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep`
			`%result = sitofp <4 x i64> %value to <4 x float>`
			`store <4 x float> %result, <4 x float> addrspace(1)* %out.gep`
			`ret void`
			`}`

[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64_to_v2f16:`
			`; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)* %out, <2 x i64> %in) #0{`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`%result = sitofp <2 x i64> %in to <2 x half>`
			`store <2 x half> %result, <2 x half> addrspace(1)* %out`
			`ret void`
			`}`

			`; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64_to_v4f16:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`%tid = call i32 @llvm.amdgcn.workitem.id.x()`
			`%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid`
			`%out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid`
			`%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep`
			`%result = sitofp <4 x i64> %value to <4 x half>`
			`store <4 x half> %result, <4 x half> addrspace(1)* %out.gep`
			`ret void`
			`}`

AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`declare i32 @llvm.amdgcn.workitem.id.x() #1`
AMDGPU: Implement {{s\|u}}int_to_fp i64 -> f32 The old lowering for uint_to_fp failed opencl conformance. It might be OK for fast math mode, but I'm not sure. llvm-svn: 257393 2016-01-12 06:01:48 +08:00
			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readnone }`