llvm-project/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll

; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

; GCN-LABEL: {{^}}madak_f16
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; VI:  v_madak_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], 0x4900{{$}}
; VI:  buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @madak_f16(
    half addrspace(1)* %r,
    half addrspace(1)* %a,
    half addrspace(1)* %b) {
entry:
  %a.val = load half, half addrspace(1)* %a
  %b.val = load half, half addrspace(1)* %b

  %t.val = fmul half %a.val, %b.val
  %r.val = fadd half %t.val, 10.0

  store half %r.val, half addrspace(1)* %r
  ret void
}

; GCN-LABEL: {{^}}madak_f16_use_2
; SI:  v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SI:  v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI:  v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI:  v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @madak_f16_use_2(
    half addrspace(1)* %r0,
    half addrspace(1)* %r1,
    half addrspace(1)* %a,
    half addrspace(1)* %b,
    half addrspace(1)* %c) {
entry:
  %a.val = load half, half addrspace(1)* %a
  %b.val = load half, half addrspace(1)* %b
  %c.val = load half, half addrspace(1)* %c

  %t0.val = fmul half %a.val, %b.val
  %t1.val = fmul half %a.val, %c.val
  %r0.val = fadd half %t0.val, 10.0
  %r1.val = fadd half %t1.val, 10.0

  store half %r0.val, half addrspace(1)* %r0
  store half %r1.val, half addrspace(1)* %r1
  ret void
}
AMDGPU: Combine fp16/fp64 subtarget features The same control register controls both, and are set to the same defaults. Keep the old names around as aliases. llvm-svn: 292837 2017-01-24 06:31:03 +08:00			`; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00
			`; GCN-LABEL: {{^}}madak_f16`
			`; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]`
			`; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]`
[AMDGPU][MC] Corrected v_madak/madmk to avoid printing "_e32" in disassembler output See bug 32927: https://bugs.llvm.org//show_bug.cgi?id=32927 Reviewers: vpykhtin, artem.tamazov, arsenm Differential Revision: https://reviews.llvm.org/D32913 llvm-svn: 302648 2017-05-10 21:00:28 +08:00			`; VI: v_madak_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], 0x4900{{$}}`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`; VI: buffer_store_short v[[R_F16]]`
			`; GCN: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @madak_f16(`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`half addrspace(1)* %r,`
			`half addrspace(1)* %a,`
			`half addrspace(1)* %b) {`
			`entry:`
			`%a.val = load half, half addrspace(1)* %a`
			`%b.val = load half, half addrspace(1)* %b`

			`%t.val = fmul half %a.val, %b.val`
			`%r.val = fadd half %t.val, 10.0`

			`store half %r.val, half addrspace(1)* %r`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}madak_f16_use_2`
			`; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}`
			`; SI: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}`
			`; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}`
			`; VI: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @madak_f16_use_2(`
[AMDGPU] Add f16 support (VI+) Differential Revision: https://reviews.llvm.org/D25975 llvm-svn: 286753 2016-11-13 15:01:11 +08:00			`half addrspace(1)* %r0,`
			`half addrspace(1)* %r1,`
			`half addrspace(1)* %a,`
			`half addrspace(1)* %b,`
			`half addrspace(1)* %c) {`
			`entry:`
			`%a.val = load half, half addrspace(1)* %a`
			`%b.val = load half, half addrspace(1)* %b`
			`%c.val = load half, half addrspace(1)* %c`

			`%t0.val = fmul half %a.val, %b.val`
			`%t1.val = fmul half %a.val, %c.val`
			`%r0.val = fadd half %t0.val, 10.0`
			`%r1.val = fadd half %t1.val, 10.0`

			`store half %r0.val, half addrspace(1)* %r0`
			`store half %r1.val, half addrspace(1)* %r1`
			`ret void`
			`}`