llvm-project/llvm/test/CodeGen/AMDGPU/v_mac.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s

; GCN-LABEL: {{^}}mac_vvv:
; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
; GCN: buffer_load_dword [[B:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4
; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
; GCN: buffer_store_dword [[C]]
define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load volatile float, float addrspace(1)* %in
  %b = load volatile float, float addrspace(1)* %b_ptr
  %c = load volatile float, float addrspace(1)* %c_ptr

  %tmp0 = fmul float %a, %b
  %tmp1 = fadd float %tmp0, %c
  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5
define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) #0 {
entry:
  %tmp0 = fmul float 0.5, %in
  %tmp1 = fadd float %tmp0, 0.5
  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}mad_vvs:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr

  %tmp0 = fmul float %a, %b
  %tmp1 = fadd float %tmp0, %c
  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}mac_ssv:
; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) #0 {
entry:
  %c = load float, float addrspace(1)* %in

  %tmp0 = fmul float %a, %a
  %tmp1 = fadd float %tmp0, %c
  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}mac_mad_same_add:
; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
  %d_ptr = getelementptr float, float addrspace(1)* %in, i32 3
  %e_ptr = getelementptr float, float addrspace(1)* %in, i32 4

  %a = load volatile float, float addrspace(1)* %in
  %b = load volatile float, float addrspace(1)* %b_ptr
  %c = load volatile float, float addrspace(1)* %c_ptr
  %d = load volatile float, float addrspace(1)* %d_ptr
  %e = load volatile float, float addrspace(1)* %e_ptr

  %tmp0 = fmul float %a, %b
  %tmp1 = fadd float %tmp0, %c

  %tmp2 = fmul float %d, %e
  %tmp3 = fadd float %tmp2, %c

  %out1 = getelementptr float, float addrspace(1)* %out, i32 1
  store float %tmp1, float addrspace(1)* %out
  store float %tmp3, float addrspace(1)* %out1
  ret void
}

; There is no advantage to using v_mac when one of the operands is negated
; and v_mad accepts more operand types.

; GCN-LABEL: {{^}}mad_neg_src0:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
define void @mad_neg_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

  %neg_a = fsub float -0.0, %a
  %tmp0 = fmul float %neg_a, %b
  %tmp1 = fadd float %tmp0, %c

  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}unsafe_mad_sub0_src0:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
define void @unsafe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

  %neg_a = fsub float 0.0, %a
  %tmp0 = fmul float %neg_a, %b
  %tmp1 = fadd float %tmp0, %c

  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}safe_mad_sub0_src0:
; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0,
; GCN: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[SUB0]]
define void @safe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

  %neg_a = fsub float 0.0, %a
  %tmp0 = fmul float %neg_a, %b
  %tmp1 = fadd float %tmp0, %c

  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}mad_neg_src1:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
define void @mad_neg_src1(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

  %neg_b = fsub float -0.0, %b
  %tmp0 = fmul float %a, %neg_b
  %tmp1 = fadd float %tmp0, %c

  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}unsafe_mad_sub0_src1:
; GCN-NOT: v_mac_f32
; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
define void @unsafe_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

  %neg_b = fsub float 0.0, %b
  %tmp0 = fmul float %a, %neg_b
  %tmp1 = fadd float %tmp0, %c

  store float %tmp1, float addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}mad_neg_src2:
; GCN-NOT: v_mac
; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
define void @mad_neg_src2(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
entry:
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2

  %a = load float, float addrspace(1)* %in
  %b = load float, float addrspace(1)* %b_ptr
  %c = load float, float addrspace(1)* %c_ptr

  %neg_c = fsub float -0.0, %c
  %tmp0 = fmul float %a, %b
  %tmp1 = fadd float %tmp0, %neg_c

  store float %tmp1, float addrspace(1)* %out
  ret void
}

; Without special casing the inline constant check for v_mac_f32's
; src2, this fails to fold the 1.0 into a mad.

; GCN-LABEL: {{^}}fold_inline_imm_into_mac_src2_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]

; GCN: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
; GCN: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
define void @fold_inline_imm_into_mac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) #3 {
bb:
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext
  %gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext
  %gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
  %tmp = load volatile float, float addrspace(1)* %gep.a
  %tmp1 = load volatile float, float addrspace(1)* %gep.b
  %tmp2 = fadd float %tmp, %tmp
  %tmp3 = fmul float %tmp2, 4.0
  %tmp4 = fsub float 1.0, %tmp3
  %tmp5 = fadd float %tmp4, %tmp1
  %tmp6 = fadd float %tmp1, %tmp1
  %tmp7 = fmul float %tmp6, %tmp
  %tmp8 = fsub float 1.0, %tmp7
  %tmp9 = fmul float %tmp8, 8.0
  %tmp10 = fadd float %tmp5, %tmp9
  store float %tmp10, float addrspace(1)* %gep.out
  ret void
}

; GCN-LABEL: {{^}}fold_inline_imm_into_mac_src2_f16:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: {{buffer|flat}}_load_ushort [[B:v[0-9]+]]

; FIXME: How is this not folded?
; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00

; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 {
bb:
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
  %gep.out = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
  %tmp = load volatile half, half addrspace(1)* %gep.a
  %tmp1 = load volatile half, half addrspace(1)* %gep.b
  %tmp2 = fadd half %tmp, %tmp
  %tmp3 = fmul half %tmp2, 4.0
  %tmp4 = fsub half 1.0, %tmp3
  %tmp5 = fadd half %tmp4, %tmp1
  %tmp6 = fadd half %tmp1, %tmp1
  %tmp7 = fmul half %tmp6, %tmp
  %tmp8 = fsub half 1.0, %tmp7
  %tmp9 = fmul half %tmp8, 8.0
  %tmp10 = fadd half %tmp5, %tmp9
  store half %tmp10, half addrspace(1)* %gep.out
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #2

attributes #0 = { nounwind "unsafe-fp-math"="false" }
attributes #1 = { nounwind "unsafe-fp-math"="true" }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=GCN %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s`
			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00
			`; GCN-LABEL: {{^}}mac_vvv:`
AMDGPU/SI: Assembler: Unify parsing/printing of operands. Summary: The goal is for each operand type to have its own parse function and at the same time share common code for tracking state as different instruction types share operand types (e.g. glc/glc_flat, etc). Introduce parseAMDGPUOperand which can parse any optional operand. DPP and Clamp/OMod have custom handling for now. Sam also suggested to have class hierarchy for operand types instead of table. This can be done in separate change. Remove parseVOP3OptionalOps, parseDS*OptionalOps, parseFlatOptionalOps, parseMubufOptionalOps, parseDPPOptionalOps. Reduce number of definitions of AsmOperand's and MatchClasses' by using common base class. Rename AsmMatcher/InstPrinter methods accordingly. Print immediate type when printing parsed immediate operand. Use 'off' if offset/index register is unused instead of skipping it to make it more readable (also agreed with SP3). Update tests. Reviewers: tstellarAMD, SamWot, artem.tamazov Subscribers: qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D19584 llvm-svn: 268015 2016-04-29 17:02:30 +08:00			`; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}`
			`; GCN: buffer_load_dword [[B:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4`
			`; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]`
			`; GCN: buffer_store_dword [[C]]`
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

AMDGPU: Add volatile to test loads and stores When the memory vectorizer is enabled, these tests break. These tests don't really care about the memory instructions, and it's easier to write check lines with the unmerged loads. llvm-svn: 266071 2016-04-12 21:38:18 +08:00			`%a = load volatile float, float addrspace(1)* %in`
			`%b = load volatile float, float addrspace(1)* %b_ptr`
			`%c = load volatile float, float addrspace(1)* %c_ptr`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00
			`%tmp0 = fmul float %a, %b`
			`%tmp1 = fadd float %tmp0, %c`
			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}mad_inline_sgpr_inline:`
			`; GCN-NOT: v_mac_f32`
AMDGPU: Run SIFoldOperands after PeepholeOptimizer PeepholeOptimizer cleans up redundant copies, which makes the operand folding more effective. shader-db stats: Totals: SGPRS: 34200 -> 34336 (0.40 %) VGPRS: 22118 -> 21655 (-2.09 %) Code Size: 632144 -> 633460 (0.21 %) bytes LDS: 11 -> 11 (0.00 %) blocks Scratch: 10240 -> 11264 (10.00 %) bytes per wave Max Waves: 8822 -> 8918 (1.09 %) Wait states: 0 -> 0 (0.00 %) Totals from affected shaders: SGPRS: 7704 -> 7840 (1.77 %) VGPRS: 5169 -> 4706 (-8.96 %) Code Size: 234444 -> 235760 (0.56 %) bytes LDS: 2 -> 2 (0.00 %) blocks Scratch: 0 -> 1024 (0.00 %) bytes per wave Max Waves: 1188 -> 1284 (8.08 %) Wait states: 0 -> 0 (0.00 %) Increases: SGPRS: 35 (0.01 %) VGPRS: 1 (0.00 %) Code Size: 59 (0.02 %) LDS: 0 (0.00 %) Scratch: 1 (0.00 %) Max Waves: 48 (0.02 %) Wait states: 0 (0.00 %) Decreases: SGPRS: 26 (0.01 %) VGPRS: 54 (0.02 %) Code Size: 68 (0.03 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) Max Waves: 4 (0.00 %) Wait states: 0 (0.00 %) llvm-svn: 266378 2016-04-15 05:58:24 +08:00			`; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5`
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`define void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) #0 {`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`entry:`
			`%tmp0 = fmul float 0.5, %in`
			`%tmp1 = fadd float %tmp0, 0.5`
			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}mad_vvs:`
			`; GCN-NOT: v_mac_f32`
			`; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}`
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`define void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) #0 {`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`

			`%tmp0 = fmul float %a, %b`
			`%tmp1 = fadd float %tmp0, %c`
			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}mac_ssv:`
			`; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}`
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`define void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) #0 {`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`entry:`
			`%c = load float, float addrspace(1)* %in`

			`%tmp0 = fmul float %a, %a`
			`%tmp1 = fadd float %tmp0, %c`
			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}mac_mad_same_add:`
			`; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]`
			`; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}`
AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`define void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`
			`%d_ptr = getelementptr float, float addrspace(1)* %in, i32 3`
			`%e_ptr = getelementptr float, float addrspace(1)* %in, i32 4`

AMDGPU: Add volatile to test loads and stores When the memory vectorizer is enabled, these tests break. These tests don't really care about the memory instructions, and it's easier to write check lines with the unmerged loads. llvm-svn: 266071 2016-04-12 21:38:18 +08:00			`%a = load volatile float, float addrspace(1)* %in`
			`%b = load volatile float, float addrspace(1)* %b_ptr`
			`%c = load volatile float, float addrspace(1)* %c_ptr`
			`%d = load volatile float, float addrspace(1)* %d_ptr`
			`%e = load volatile float, float addrspace(1)* %e_ptr`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00
			`%tmp0 = fmul float %a, %b`
			`%tmp1 = fadd float %tmp0, %c`

			`%tmp2 = fmul float %d, %e`
			`%tmp3 = fadd float %tmp2, %c`

			`%out1 = getelementptr float, float addrspace(1)* %out, i32 1`
			`store float %tmp1, float addrspace(1)* %out`
			`store float %tmp3, float addrspace(1)* %out1`
			`ret void`
			`}`

			`; There is no advantage to using v_mac when one of the operands is negated`
			`; and v_mad accepts more operand types.`

			`; GCN-LABEL: {{^}}mad_neg_src0:`
			`; GCN-NOT: v_mac_f32`
			`; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}`
			`define void @mad_neg_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`
			`%c = load float, float addrspace(1)* %c_ptr`

AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`%neg_a = fsub float -0.0, %a`
			`%tmp0 = fmul float %neg_a, %b`
			`%tmp1 = fadd float %tmp0, %c`

			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}unsafe_mad_sub0_src0:`
			`; GCN-NOT: v_mac_f32`
			`; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}`
			`define void @unsafe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 {`
			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`
			`%c = load float, float addrspace(1)* %c_ptr`

			`%neg_a = fsub float 0.0, %a`
			`%tmp0 = fmul float %neg_a, %b`
			`%tmp1 = fadd float %tmp0, %c`

			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}safe_mad_sub0_src0:`
			`; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0,`
			`; GCN: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[SUB0]]`
			`define void @safe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`
			`%c = load float, float addrspace(1)* %c_ptr`

AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`%neg_a = fsub float 0.0, %a`
			`%tmp0 = fmul float %neg_a, %b`
			`%tmp1 = fadd float %tmp0, %c`

			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}mad_neg_src1:`
			`; GCN-NOT: v_mac_f32`
			`; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}`
			`define void @mad_neg_src1(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`
			`%c = load float, float addrspace(1)* %c_ptr`

AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`%neg_b = fsub float -0.0, %b`
			`%tmp0 = fmul float %a, %neg_b`
			`%tmp1 = fadd float %tmp0, %c`

			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}unsafe_mad_sub0_src1:`
			`; GCN-NOT: v_mac_f32`
			`; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}`
			`define void @unsafe_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 {`
			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`
			`%c = load float, float addrspace(1)* %c_ptr`

AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`%neg_b = fsub float 0.0, %b`
			`%tmp0 = fmul float %a, %neg_b`
			`%tmp1 = fadd float %tmp0, %c`

			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}mad_neg_src2:`
			`; GCN-NOT: v_mac`
			`; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}`
			`define void @mad_neg_src2(float addrspace(1)* %out, float addrspace(1)* %in) #0 {`
			`entry:`
			`%b_ptr = getelementptr float, float addrspace(1)* %in, i32 1`
			`%c_ptr = getelementptr float, float addrspace(1)* %in, i32 2`

			`%a = load float, float addrspace(1)* %in`
			`%b = load float, float addrspace(1)* %b_ptr`
			`%c = load float, float addrspace(1)* %c_ptr`

AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`%neg_c = fsub float -0.0, %c`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`%tmp0 = fmul float %a, %b`
			`%tmp1 = fadd float %tmp0, %neg_c`

			`store float %tmp1, float addrspace(1)* %out`
			`ret void`
			`}`

AMDGPU: Fix folding immediates into mac src2 Whether it is legal or not needs to check for the instruction it will be replaced with. llvm-svn: 291711 2017-01-12 06:00:02 +08:00			`; Without special casing the inline constant check for v_mac_f32's`
			`; src2, this fails to fold the 1.0 into a mad.`

			`; GCN-LABEL: {{^}}fold_inline_imm_into_mac_src2_f32:`
			`; GCN: {{buffer\|flat}}_load_dword [[A:v[0-9]+]]`
			`; GCN: {{buffer\|flat}}_load_dword [[B:v[0-9]+]]`

			`; GCN: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]`
			`; GCN: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0`
			`define void @fold_inline_imm_into_mac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) #3 {`
			`bb:`
			`%tid = call i32 @llvm.amdgcn.workitem.id.x()`
			`%tid.ext = sext i32 %tid to i64`
			`%gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext`
			`%gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext`
			`%gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext`
			`%tmp = load volatile float, float addrspace(1)* %gep.a`
			`%tmp1 = load volatile float, float addrspace(1)* %gep.b`
			`%tmp2 = fadd float %tmp, %tmp`
			`%tmp3 = fmul float %tmp2, 4.0`
			`%tmp4 = fsub float 1.0, %tmp3`
			`%tmp5 = fadd float %tmp4, %tmp1`
			`%tmp6 = fadd float %tmp1, %tmp1`
			`%tmp7 = fmul float %tmp6, %tmp`
			`%tmp8 = fsub float 1.0, %tmp7`
			`%tmp9 = fmul float %tmp8, 8.0`
			`%tmp10 = fadd float %tmp5, %tmp9`
			`store float %tmp10, float addrspace(1)* %gep.out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}fold_inline_imm_into_mac_src2_f16:`
			`; GCN: {{buffer\|flat}}_load_ushort [[A:v[0-9]+]]`
			`; GCN: {{buffer\|flat}}_load_ushort [[B:v[0-9]+]]`

			`; FIXME: How is this not folded?`
			`; SI: v_cvt_f32_f16_e32 v{{[0-9]+}}, 0x3c00`

AMDGPU: Combine fp16/fp64 subtarget features The same control register controls both, and are set to the same defaults. Keep the old names around as aliases. llvm-svn: 292837 2017-01-24 06:31:03 +08:00			`; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]`
			`; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0`
AMDGPU: Fix folding immediates into mac src2 Whether it is legal or not needs to check for the instruction it will be replaced with. llvm-svn: 291711 2017-01-12 06:00:02 +08:00			`define void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 {`
			`bb:`
			`%tid = call i32 @llvm.amdgcn.workitem.id.x()`
			`%tid.ext = sext i32 %tid to i64`
			`%gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext`
			`%gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext`
			`%gep.out = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext`
			`%tmp = load volatile half, half addrspace(1)* %gep.a`
			`%tmp1 = load volatile half, half addrspace(1)* %gep.b`
			`%tmp2 = fadd half %tmp, %tmp`
			`%tmp3 = fmul half %tmp2, 4.0`
			`%tmp4 = fsub half 1.0, %tmp3`
			`%tmp5 = fadd half %tmp4, %tmp1`
			`%tmp6 = fadd half %tmp1, %tmp1`
			`%tmp7 = fmul half %tmp6, %tmp`
			`%tmp8 = fsub half 1.0, %tmp7`
			`%tmp9 = fmul half %tmp8, 8.0`
			`%tmp10 = fadd half %tmp5, %tmp9`
			`store half %tmp10, half addrspace(1)* %gep.out`
			`ret void`
			`}`

			`declare i32 @llvm.amdgcn.workitem.id.x() #2`

AMDGPU: Remove superfluous string attributes from tests Also fix v_mac.ll not testing right thing for fneg llvm-svn: 275129 2016-07-12 07:35:48 +08:00			`attributes #0 = { nounwind "unsafe-fp-math"="false" }`
			`attributes #1 = { nounwind "unsafe-fp-math"="true" }`
AMDGPU: Fix folding immediates into mac src2 Whether it is legal or not needs to check for the instruction it will be replaced with. llvm-svn: 291711 2017-01-12 06:00:02 +08:00			`attributes #2 = { nounwind readnone }`
			`attributes #3 = { nounwind }`