llvm-project/llvm/test/CodeGen/AMDGPU/llvm.round.ll

; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s

; FUNC-LABEL: {{^}}round_f32:
; SI-DAG: s_load_dword [[SX:s[0-9]+]]
; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
; SI: v_cmp_le_f32_e64 vcc, 0.5, |[[SUB]]|
; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]]
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
; SI: buffer_store_dword [[RESULT]]

; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
; R600-DAG: ADD  {{.*}},
; R600-DAG: BFI_INT
; R600-DAG: SETGE
; R600-DAG: CNDE
; R600-DAG: ADD
define void @round_f32(float addrspace(1)* %out, float %x) #0 {
  %result = call float @llvm.round.f32(float %x) #1
  store float %result, float addrspace(1)* %out
  ret void
}

; The vector tests are really difficult to verify, since it can be hard to
; predict how the scheduler will order the instructions.  We already have
; a test for the scalar case, so the vector tests just check that the
; compiler doesn't crash.

; FUNC-LABEL: {{^}}round_v2f32:
; SI: s_endpgm
; R600: CF_END
define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {
  %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
  store <2 x float> %result, <2 x float> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}round_v4f32:
; SI: s_endpgm
; R600: CF_END
define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {
  %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
  store <4 x float> %result, <4 x float> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}round_v8f32:
; SI: s_endpgm
; R600: CF_END
define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {
  %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
  store <8 x float> %result, <8 x float> addrspace(1)* %out
  ret void
}

declare float @llvm.round.f32(float) #1
declare <2 x float> @llvm.round.v2f32(<2 x float>) #1
declare <4 x float> @llvm.round.v4f32(<4 x float>) #1
declare <8 x float> @llvm.round.v8f32(<8 x float>) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`; RUN: llc -march=amdgcn -mcpu=SI < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
R600/SI: Enable a lot of existing tests for VI (squashed commits) This is a union of these commits: * R600/SI: Enable more tests for VI which need no changes * R600/SI: Enable V_BCNT tests for VI Differences: - v_bcnt_..._e32 -> _e64 - s_load_dword* inline offset is in bytes instead of dwords * R600/SI: Enable all tests for VI which use S_LOAD_DWORD The inline offset is changed from dwords to bytes. * R600/SI: Enable LDS tests for VI Differences: - the s_load_dword inline offset changed from dwords to bytes - the tests checked very little on CI, so they have been fixed to check all instructions that "SI" checked * R600/SI: Enable lshr tests for VI * R600/SI: Fix divrem64 tests - "v_lshl_64" was missing "b" before "64" - added VI-NOT checks * R600/SI: Enable the SI.tid test for VI * R600/SI: Enable the frem test for VI Also, the frem_f64 checking is added for CI-VI. * R600/SI: Add VI tests for rsq.clamped llvm-svn: 228830 2015-02-11 22:26:46 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`; RUN: llc -march=r600 -mcpu=redwood < %s \| FileCheck -check-prefix=R600 -check-prefix=FUNC %s`

			`; FUNC-LABEL: {{^}}round_f32:`
			`; SI-DAG: s_load_dword [[SX:s[0-9]+]]`
			`; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff`
			`; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]`
			`; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]`
R600/SI: Define a schedule model and enable the generic machine scheduler The schedule model is not complete yet, and could be improved. llvm-svn: 227461 2015-01-30 00:55:25 +08:00			`; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]`
			`; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]`
AMDGPU/SI: Add support for shrinking v_cndmask_b32_e32 instructions Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11061 llvm-svn: 242146 2015-07-14 22:15:03 +08:00			`; SI: v_cmp_le_f32_e64 vcc, 0.5, \|[[SUB]]\|`
			`; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]]`
R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]`
			`; SI: buffer_store_dword [[RESULT]]`

			`; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]`
			`; R600-DAG: ADD {{.*}},`
			`; R600-DAG: BFI_INT`
			`; R600-DAG: SETGE`
			`; R600-DAG: CNDE`
			`; R600-DAG: ADD`
			`define void @round_f32(float addrspace(1)* %out, float %x) #0 {`
			`%result = call float @llvm.round.f32(float %x) #1`
			`store float %result, float addrspace(1)* %out`
R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195878 2013-11-28 05:23:20 +08:00			`ret void`
			`}`

			`; The vector tests are really difficult to verify, since it can be hard to`
			`; predict how the scheduler will order the instructions. We already have`
			`; a test for the scalar case, so the vector tests just check that the`
			`; compiler doesn't crash.`

R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`; FUNC-LABEL: {{^}}round_v2f32:`
			`; SI: s_endpgm`
R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195878 2013-11-28 05:23:20 +08:00			`; R600: CF_END`
R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {`
			`%result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1`
			`store <2 x float> %result, <2 x float> addrspace(1)* %out`
R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195878 2013-11-28 05:23:20 +08:00			`ret void`
			`}`

R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`; FUNC-LABEL: {{^}}round_v4f32:`
			`; SI: s_endpgm`
R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195878 2013-11-28 05:23:20 +08:00			`; R600: CF_END`
R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {`
			`%result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1`
			`store <4 x float> %result, <4 x float> addrspace(1)* %out`
R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195878 2013-11-28 05:23:20 +08:00			`ret void`
			`}`

R600/SI: Custom lower fround This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682 2015-01-22 02:18:25 +08:00			`; FUNC-LABEL: {{^}}round_v8f32:`
			`; SI: s_endpgm`
			`; R600: CF_END`
			`define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {`
			`%result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1`
			`store <8 x float> %result, <8 x float> addrspace(1)* %out`
			`ret void`
			`}`

			`declare float @llvm.round.f32(float) #1`
			`declare <2 x float> @llvm.round.v2f32(<2 x float>) #1`
			`declare <4 x float> @llvm.round.v4f32(<4 x float>) #1`
			`declare <8 x float> @llvm.round.v8f32(<8 x float>) #1`

			`attributes #0 = { nounwind }`
			`attributes #1 = { nounwind readnone }`