llvm-project/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s

; FIXME: Check something here. Currently it seems fabs + fneg aren't
; into 2 modifiers, although theoretically that should work.

; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:
; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
  %fabs = call double @llvm.fabs.f64(double %x)
  %fsub = fsub double -0.000000e+00, %fabs
  %fadd = fadd double %y, %fsub
  store double %fadd, double addrspace(1)* %out, align 8
  ret void
}

define amdgpu_kernel void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
  %x = load double, double addrspace(1)* %xptr, align 8
  %y = load double, double addrspace(1)* %xptr, align 8
  %fabs = call double @llvm.fabs.f64(double %x)
  %fsub = fsub double -0.000000e+00, %fabs
  %fadd = fadd double %y, %fsub
  store double %fadd, double addrspace(1)* %out, align 8
  ret void
}

; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:
; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
  %fabs = call double @llvm.fabs.f64(double %x)
  %fsub = fsub double -0.000000e+00, %fabs
  %fmul = fmul double %y, %fsub
  store double %fmul, double addrspace(1)* %out, align 8
  ret void
}

; GCN-LABEL: {{^}}fneg_fabs_free_f64:
define amdgpu_kernel void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
  %bc = bitcast i64 %in to double
  %fabs = call double @llvm.fabs.f64(double %bc)
  %fsub = fsub double -0.000000e+00, %fabs
  store double %fsub, double addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64:
; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
  %bc = bitcast i64 %in to double
  %fabs = call double @fabs(double %bc)
  %fsub = fsub double -0.000000e+00, %fabs
  store double %fsub, double addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}fneg_fabs_f64:
; GCN-DAG: s_load_dwordx2
; GCN-DAG: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
; SI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
; VI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
; GCN: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
  %fabs = call double @llvm.fabs.f64(double %in)
  %fsub = fsub double -0.000000e+00, %fabs
  store double %fsub, double addrspace(1)* %out, align 8
  ret void
}

; GCN-LABEL: {{^}}fneg_fabs_v2f64:
; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
; GCN-NOT: 0x80000000
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
  %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
  %fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
  store <2 x double> %fsub, <2 x double> addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}fneg_fabs_v4f64:
; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
; GCN-NOT: 0x80000000
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
define amdgpu_kernel void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
  %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
  %fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
  store <4 x double> %fsub, <4 x double> addrspace(1)* %out
  ret void
}

declare double @fabs(double) readnone
declare double @llvm.fabs.f64(double) readnone
declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
AMDGPU: Support commuting with immediate in src0 llvm-svn: 280970 2016-09-09 01:19:29 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=GCN %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=VI -check-prefix=GCN %s`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00
			`; FIXME: Check something here. Currently it seems fabs + fneg aren't`
			`; into 2 modifiers, although theoretically that should work.`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:`
[AMDGPU] Allow SDWA in instructions with immediates and SGPRs An encoding does not allow to use SDWA in an instruction with scalar operands, either literals or SGPRs. That is however possible to copy these operands into a VGPR first. Several copies of the value are produced if multiple SDWA conversions were done. To cleanup MachineLICM (to hoist copies out of loops), MachineCSE (to remove duplicate copies) and SIFoldOperands (to replace SGPR to VGPR copy with immediate copy right to the VGPR) runs are added after the SDWA pass. Differential Revision: https://reviews.llvm.org/D33583 llvm-svn: 304219 2017-05-31 00:49:24 +08:00			`; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -\|v{{\[[0-9]+:[0-9]+\]}}\|`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%fabs = call double @llvm.fabs.f64(double %x)`
			`%fsub = fsub double -0.000000e+00, %fabs`
			`%fadd = fadd double %y, %fsub`
			`store double %fadd, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%x = load double, double addrspace(1)* %xptr, align 8`
			`%y = load double, double addrspace(1)* %xptr, align 8`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%fabs = call double @llvm.fabs.f64(double %x)`
			`%fsub = fsub double -0.000000e+00, %fabs`
			`%fadd = fadd double %y, %fsub`
			`store double %fadd, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:`
[AMDGPU] Allow SDWA in instructions with immediates and SGPRs An encoding does not allow to use SDWA in an instruction with scalar operands, either literals or SGPRs. That is however possible to copy these operands into a VGPR first. Several copies of the value are produced if multiple SDWA conversions were done. To cleanup MachineLICM (to hoist copies out of loops), MachineCSE (to remove duplicate copies) and SIFoldOperands (to replace SGPR to VGPR copy with immediate copy right to the VGPR) runs are added after the SDWA pass. Differential Revision: https://reviews.llvm.org/D33583 llvm-svn: 304219 2017-05-31 00:49:24 +08:00			`; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -\|v{{\[[0-9]+:[0-9]+\]}}\|`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%fabs = call double @llvm.fabs.f64(double %x)`
			`%fsub = fsub double -0.000000e+00, %fabs`
			`%fmul = fmul double %y, %fsub`
			`store double %fmul, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_free_f64:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%bc = bitcast i64 %in to double`
			`%fabs = call double @llvm.fabs.f64(double %bc)`
			`%fsub = fsub double -0.000000e+00, %fabs`
			`store double %fsub, double addrspace(1)* %out`
			`ret void`
			`}`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64:`
			`; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%bc = bitcast i64 %in to double`
			`%fabs = call double @fabs(double %bc)`
			`%fsub = fsub double -0.000000e+00, %fabs`
			`store double %fsub, double addrspace(1)* %out`
			`ret void`
			`}`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_f64:`
MachineScheduler: Fully compare top/bottom candidates In bidirectional scheduling this gives more stable results than just comparing the "reason" fields of the top/bottom node because the reason field may be higher depending on what other nodes are in the queue. Differential Revision: http://reviews.llvm.org/D19401 llvm-svn: 273755 2016-06-25 08:23:00 +08:00			`; GCN-DAG: s_load_dwordx2`
AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-DAG: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}`
			`; SI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb`
			`; VI-DAG: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c`
			`; GCN-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]`
			`; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]`
			`; GCN: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%fabs = call double @llvm.fabs.f64(double %in)`
			`%fsub = fsub double -0.000000e+00, %fabs`
			`store double %fsub, double addrspace(1)* %out, align 8`
			`ret void`
			`}`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_v2f64:`
			`; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}`
			`; GCN-NOT: 0x80000000`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)`
			`%fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs`
			`store <2 x double> %fsub, <2 x double> addrspace(1)* %out`
			`ret void`
			`}`

AMDGPU/SI: Use the hazard recognizer to break SMEM soft clauses Summary: Add support for detecting hazards in SMEM soft clauses, so that we only break the clauses when necessary, either by adding s_nop or re-ordering other alu instructions. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18870 llvm-svn: 268260 2016-05-03 01:39:06 +08:00			`; GCN-LABEL: {{^}}fneg_fabs_v4f64:`
			`; GCN: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}`
			`; GCN-NOT: 0x80000000`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
			`; GCN: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {`
R600/SI: Use source modifiers for f64 fneg llvm-svn: 215748 2014-08-16 02:42:18 +08:00			`%fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)`
			`%fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs`
			`store <4 x double> %fsub, <4 x double> addrspace(1)* %out`
			`ret void`
			`}`

			`declare double @fabs(double) readnone`
			`declare double @llvm.fabs.f64(double) readnone`
			`declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone`
			`declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone`