llvm-project/llvm/test/CodeGen/AMDGPU/fneg.ll

; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=R600 -check-prefix=FUNC %s

; FUNC-LABEL: {{^}}s_fneg_f32:
; R600: -PV

; GCN: s_load_dword [[VAL:s[0-9]+]]
; GCN: s_xor_b32 [[NEG_VAL:s[0-9]+]], [[VAL]], 0x80000000
; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[NEG_VAL]]
define amdgpu_kernel void @s_fneg_f32(float addrspace(1)* %out, float %in) {
  %fneg = fsub float -0.000000e+00, %in
  store float %fneg, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}s_fneg_v2f32:
; R600: -PV
; R600: -PV

; GCN: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1
; GCN: s_xor_b32
; GCN: s_xor_b32
define amdgpu_kernel void @s_fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
  %fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
  store <2 x float> %fneg, <2 x float> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}s_fneg_v4f32:
; R600: -PV
; R600: -T
; R600: -PV
; R600: -PV

; GCN: s_xor_b32
; GCN: s_xor_b32
; GCN: s_xor_b32
; GCN: s_xor_b32
define amdgpu_kernel void @s_fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
  %fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
  store <4 x float> %fneg, <4 x float> addrspace(1)* %out
  ret void
}

; DAGCombiner will transform:
; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
; unless the target returns true for isNegFree()

; FUNC-LABEL: {{^}}fsub0_f32:

; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}

; R600-NOT: XOR
; R600: -KC0[2].Z
define amdgpu_kernel void @fsub0_f32(float addrspace(1)* %out, i32 %in) {
  %bc = bitcast i32 %in to float
  %fsub = fsub float 0.0, %bc
  store float %fsub, float addrspace(1)* %out
  ret void
}
; FUNC-LABEL: {{^}}fneg_free_f32:
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c

; GCN: s_xor_b32 [[RES:s[0-9]+]], [[NEG_VALUE]], 0x80000000
; GCN: v_mov_b32_e32 [[V_RES:v[0-9]+]], [[RES]]
; GCN: buffer_store_dword [[V_RES]]

; R600-NOT: XOR
; R600: -PV.W
define amdgpu_kernel void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
  %bc = bitcast i32 %in to float
  %fsub = fsub float -0.0, %bc
  store float %fsub, float addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}fneg_fold_f32:
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: xor
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define amdgpu_kernel void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
  %fsub = fsub float -0.0, %in
  %fmul = fmul float %fsub, %in
  store float %fmul, float addrspace(1)* %out
  ret void
}

; Make sure we turn some integer operations back into fabs
; FUNC-LABEL: {{^}}bitpreserve_fneg_f32:
; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0
define amdgpu_kernel void @bitpreserve_fneg_f32(float addrspace(1)* %out, float %in) {
  %in.bc = bitcast float %in to i32
  %int.abs = xor i32 %in.bc, 2147483648
  %bc = bitcast i32 %int.abs to float
  %fadd = fmul float %bc, 4.0
  store float %fadd, float addrspace(1)* %out
  ret void
}
AMDGPU/GlobalISel: Select G_FABS/G_FNEG f64 doesn't work yet because tablegen currently doesn't handlde REG_SEQUENCE. This does regress some multi use VALU fneg cases since now the immediate remains in an SGPR, and more moves are used for legalizing the xor. This is a SIFixSGPRCopies deficiency. llvm-svn: 371540 2019-09-11 01:19:46 +08:00			`; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s`
			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s`
			`; RUN: llc -march=r600 -mcpu=redwood < %s \| FileCheck -enable-var-scope -check-prefix=R600 -check-prefix=FUNC %s`
R600: Expand vector FNEG llvm-svn: 186913 2013-07-23 09:47:46 +08:00
AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00			`; FUNC-LABEL: {{^}}s_fneg_f32:`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`; R600: -PV`

AMDGPU/GlobalISel: Select G_FABS/G_FNEG f64 doesn't work yet because tablegen currently doesn't handlde REG_SEQUENCE. This does regress some multi use VALU fneg cases since now the immediate remains in an SGPR, and more moves are used for legalizing the xor. This is a SIFixSGPRCopies deficiency. llvm-svn: 371540 2019-09-11 01:19:46 +08:00			`; GCN: s_load_dword [[VAL:s[0-9]+]]`
			`; GCN: s_xor_b32 [[NEG_VAL:s[0-9]+]], [[VAL]], 0x80000000`
			`; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[NEG_VAL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_fneg_f32(float addrspace(1)* %out, float %in) {`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`%fneg = fsub float -0.000000e+00, %in`
			`store float %fneg, float addrspace(1)* %out`
R600/SI: Prefer SALU instructions for bit shift operations All shift operations will be selected as SALU instructions and then if necessary lowered to VALU instructions in the SIFixSGPRCopies pass. This allows us to do more operations on the SALU which will improve performance and is also required for implementing private memory using indirect addressing, since the private memory pointers must stay in the scalar registers. This patch includes some fixes from Matt Arsenault. llvm-svn: 194625 2013-11-14 07:36:37 +08:00			`ret void`
			`}`

AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00			`; FUNC-LABEL: {{^}}s_fneg_v2f32:`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`; R600: -PV`
			`; R600: -PV`

AMDGPU/GlobalISel: Select G_FABS/G_FNEG f64 doesn't work yet because tablegen currently doesn't handlde REG_SEQUENCE. This does regress some multi use VALU fneg cases since now the immediate remains in an SGPR, and more moves are used for legalizing the xor. This is a SIFixSGPRCopies deficiency. llvm-svn: 371540 2019-09-11 01:19:46 +08:00			`; GCN: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1`
[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; GCN: s_xor_b32`
			`; GCN: s_xor_b32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in`
			`store <2 x float> %fneg, <2 x float> addrspace(1)* %out`
R600: Expand vector FNEG llvm-svn: 186913 2013-07-23 09:47:46 +08:00			`ret void`
			`}`

AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00			`; FUNC-LABEL: {{^}}s_fneg_v4f32:`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`; R600: -PV`
			`; R600: -T`
			`; R600: -PV`
			`; R600: -PV`

[AMDGPU] Come back patch for the 'Assign register class for cross block values according to the divergence.' Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767 2019-10-14 20:01:10 +08:00			`; GCN: s_xor_b32`
			`; GCN: s_xor_b32`
			`; GCN: s_xor_b32`
			`; GCN: s_xor_b32`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @s_fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in`
			`store <4 x float> %fneg, <4 x float> addrspace(1)* %out`
R600: Expand vector FNEG llvm-svn: 186913 2013-07-23 09:47:46 +08:00			`ret void`
			`}`
DAGCombiner: Pass the correct type to TargetLowering::isF(Abs\|Neg)Free This commit also implements these functions for R600 and removes a test case that was relying on the buggy behavior. llvm-svn: 187007 2013-07-24 07:55:03 +08:00
			`; DAGCombiner will transform:`
			`; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))`
			`; unless the target returns true for isNegFree()`

AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00			`; FUNC-LABEL: {{^}}fsub0_f32:`

			`; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}`

R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`; R600-NOT: XOR`
			`; R600: -KC0[2].Z`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fsub0_f32(float addrspace(1)* %out, i32 %in) {`
AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00			`%bc = bitcast i32 %in to float`
			`%fsub = fsub float 0.0, %bc`
			`store float %fsub, float addrspace(1)* %out`
			`ret void`
			`}`
			`; FUNC-LABEL: {{^}}fneg_free_f32:`
			`; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb`
			`; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00
AMDGPU/GlobalISel: Select G_FABS/G_FNEG f64 doesn't work yet because tablegen currently doesn't handlde REG_SEQUENCE. This does regress some multi use VALU fneg cases since now the immediate remains in an SGPR, and more moves are used for legalizing the xor. This is a SIFixSGPRCopies deficiency. llvm-svn: 371540 2019-09-11 01:19:46 +08:00			`; GCN: s_xor_b32 [[RES:s[0-9]+]], [[NEG_VALUE]], 0x80000000`
			`; GCN: v_mov_b32_e32 [[V_RES:v[0-9]+]], [[RES]]`
			`; GCN: buffer_store_dword [[V_RES]]`
AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00
			`; R600-NOT: XOR`
			`; R600: -PV.W`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`%bc = bitcast i32 %in to float`
AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 2016-11-15 10:25:28 +08:00			`%fsub = fsub float -0.0, %bc`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`store float %fsub, float addrspace(1)* %out`
DAGCombiner: Pass the correct type to TargetLowering::isF(Abs\|Neg)Free This commit also implements these functions for R600 and removes a test case that was relying on the buggy behavior. llvm-svn: 187007 2013-07-24 07:55:03 +08:00			`ret void`
			`}`
R600/SI: Fold fabs/fneg into src input modifier llvm-svn: 208480 2014-05-11 03:18:39 +08:00
R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; FUNC-LABEL: {{^}}fneg_fold_f32:`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb`
R600/SI: Enable a lot of existing tests for VI (squashed commits) This is a union of these commits: * R600/SI: Enable more tests for VI which need no changes * R600/SI: Enable V_BCNT tests for VI Differences: - v_bcnt_..._e32 -> _e64 - s_load_dword* inline offset is in bytes instead of dwords * R600/SI: Enable all tests for VI which use S_LOAD_DWORD The inline offset is changed from dwords to bytes. * R600/SI: Enable LDS tests for VI Differences: - the s_load_dword inline offset changed from dwords to bytes - the tests checked very little on CI, so they have been fixed to check all instructions that "SI" checked * R600/SI: Enable lshr tests for VI * R600/SI: Fix divrem64 tests - "v_lshl_64" was missing "b" before "64" - added VI-NOT checks * R600/SI: Enable the SI.tid test for VI * R600/SI: Enable the frem test for VI Also, the frem_f64 checking is added for CI-VI. * R600/SI: Add VI tests for rsq.clamped llvm-svn: 228830 2015-02-11 22:26:46 +08:00			`; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c`
			`; GCN-NOT: xor`
			`; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @fneg_fold_f32(float addrspace(1)* %out, float %in) {`
R600: Cleanup fneg tests llvm-svn: 214612 2014-08-02 10:26:51 +08:00			`%fsub = fsub float -0.0, %in`
			`%fmul = fmul float %fsub, %in`
			`store float %fmul, float addrspace(1)* %out`
R600/SI: Fold fabs/fneg into src input modifier llvm-svn: 208480 2014-05-11 03:18:39 +08:00			`ret void`
			`}`
AMDGPU: Implement hasBitPreservingFPLogic llvm-svn: 315754 2017-10-14 05:10:22 +08:00
			`; Make sure we turn some integer operations back into fabs`
			`; FUNC-LABEL: {{^}}bitpreserve_fneg_f32:`
			`; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0`
			`define amdgpu_kernel void @bitpreserve_fneg_f32(float addrspace(1)* %out, float %in) {`
			`%in.bc = bitcast float %in to i32`
			`%int.abs = xor i32 %in.bc, 2147483648`
			`%bc = bitcast i32 %int.abs to float`
			`%fadd = fmul float %bc, 4.0`
			`store float %fadd, float addrspace(1)* %out`
			`ret void`
			`}`