llvm-project/llvm/test/CodeGen/AMDGPU/inline-constraints.ll

; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s

; GCN-LABEL: {{^}}inline_reg_constraints:
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]

define amdgpu_kernel void @inline_reg_constraints(i32 addrspace(1)* %ptr) {
entry:
  %v32 = tail call i32 asm sideeffect "flat_load_dword   $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
  %v2_32 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
  %v64 =   tail call i64 asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
  %v4_32 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
  %v128 =  tail call i128 asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
  %s32 =   tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
  %s32_2 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
  %s64 =   tail call i64 asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
  %s4_32 =  tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
  %s128 =  tail call i128 asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
  %s256 =  tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
  ret void
}

; FIXME: Should be able to avoid copy
; GCN-LABEL: {{^}}inline_sreg_constraint_m0:
; GCN: s_mov_b32 m0, -1
; GCN: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
; GCN: ; use [[COPY_M0]]
define amdgpu_kernel void @inline_sreg_constraint_m0() {
  %m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"()
  tail call void asm sideeffect "; use $0", "s"(i32 %m0)
  ret void
}

; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i32:
; GCN: s_mov_b32 [[REG:s[0-9]+]], 32
; GCN: ; use [[REG]]
define amdgpu_kernel void @inline_sreg_constraint_imm_i32() {
  tail call void asm sideeffect "; use $0", "s"(i32 32)
  ret void
}

; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f32:
; GCN: s_mov_b32 [[REG:s[0-9]+]], 1.0
; GCN: ; use [[REG]]
define amdgpu_kernel void @inline_sreg_constraint_imm_f32() {
  tail call void asm sideeffect "; use $0", "s"(float 1.0)
  ret void
}

; FIXME: Should be able to use s_mov_b64
; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64:
; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}}
; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}}
; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
define amdgpu_kernel void @inline_sreg_constraint_imm_i64() {
  tail call void asm sideeffect "; use $0", "s"(i64 -4)
  ret void
}

; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f64:
; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}}
; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0x3ff00000{{$}}
; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}
define amdgpu_kernel void @inline_sreg_constraint_imm_f64() {
  tail call void asm sideeffect "; use $0", "s"(double 1.0)
  ret void
}
AMDGPU/SI: Add support for sgpr and vgpr inline assembly constraints Summary: The 's' constraint represents sgprs and the 'v' constraint represents vgprs. Reviewers: arsenm, echristo Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15342 llvm-svn: 255203 2015-12-10 10:12:53 +08:00			`; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs \| FileCheck --check-prefix=GCN %s`
			`; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck --check-prefix=GCN %s`

			`; GCN-LABEL: {{^}}inline_reg_constraints:`
			`; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]`
			`; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]`
[AMDGPU] Inline asm - added i16, half and i128 types support AMDGPU inline assembler support i16, half and i128 typed variables in constraints, but they were reported as error. Needed to fix https://github.com/RadeonOpenCompute/ROCm/issues/341, e.g. to be able to load with global_load_dwordx4 to a 128bit integer variable Differential Revision: https://reviews.llvm.org/D44920 llvm-svn: 334301 2018-06-09 00:29:04 +08:00			`; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]`
			`; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]`
AMDGPU/SI: Add support for sgpr and vgpr inline assembly constraints Summary: The 's' constraint represents sgprs and the 'v' constraint represents vgprs. Reviewers: arsenm, echristo Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15342 llvm-svn: 255203 2015-12-10 10:12:53 +08:00			`; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]`
			`; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]`
			`; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]`
[AMDGPU] Inline asm - added i16, half and i128 types support AMDGPU inline assembler support i16, half and i128 typed variables in constraints, but they were reported as error. Needed to fix https://github.com/RadeonOpenCompute/ROCm/issues/341, e.g. to be able to load with global_load_dwordx4 to a 128bit integer variable Differential Revision: https://reviews.llvm.org/D44920 llvm-svn: 334301 2018-06-09 00:29:04 +08:00			`; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]`
			`; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]`
AMDGPU/SI: Add support for sgpr and vgpr inline assembly constraints Summary: The 's' constraint represents sgprs and the 'v' constraint represents vgprs. Reviewers: arsenm, echristo Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15342 llvm-svn: 255203 2015-12-10 10:12:53 +08:00			`; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]`
			`; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @inline_reg_constraints(i32 addrspace(1)* %ptr) {`
AMDGPU/SI: Add support for sgpr and vgpr inline assembly constraints Summary: The 's' constraint represents sgprs and the 'v' constraint represents vgprs. Reviewers: arsenm, echristo Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15342 llvm-svn: 255203 2015-12-10 10:12:53 +08:00			`entry:`
			`%v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr)`
[AMDGPU] Inline asm - added i16, half and i128 types support AMDGPU inline assembler support i16, half and i128 typed variables in constraints, but they were reported as error. Needed to fix https://github.com/RadeonOpenCompute/ROCm/issues/341, e.g. to be able to load with global_load_dwordx4 to a 128bit integer variable Differential Revision: https://reviews.llvm.org/D44920 llvm-svn: 334301 2018-06-09 00:29:04 +08:00			`%v2_32 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)`
			`%v64 = tail call i64 asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)`
			`%v4_32 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)`
			`%v128 = tail call i128 asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)`
			`%s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)`
			`%s32_2 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)`
			`%s64 = tail call i64 asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)`
			`%s4_32 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)`
			`%s128 = tail call i128 asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)`
AMDGPU/SI: Add support for sgpr and vgpr inline assembly constraints Summary: The 's' constraint represents sgprs and the 'v' constraint represents vgprs. Reviewers: arsenm, echristo Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15342 llvm-svn: 255203 2015-12-10 10:12:53 +08:00			`%s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)`
			`ret void`
			`}`
AMDGPU: Relax SGPR asm constraint register class s should be SReg_32 to be as general as possible. This can avoid a copy from m0. llvm-svn: 280154 2016-08-31 04:50:08 +08:00
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it suggested as a better solution by Matt llvm-svn: 287942 2016-11-26 01:37:09 +08:00			`; FIXME: Should be able to avoid copy`
AMDGPU: Relax SGPR asm constraint register class s should be SReg_32 to be as general as possible. This can avoid a copy from m0. llvm-svn: 280154 2016-08-31 04:50:08 +08:00			`; GCN-LABEL: {{^}}inline_sreg_constraint_m0:`
			`; GCN: s_mov_b32 m0, -1`
AMDGPU/SI: Add back reverted SGPR spilling code, but disable it suggested as a better solution by Matt llvm-svn: 287942 2016-11-26 01:37:09 +08:00			`; GCN: s_mov_b32 [[COPY_M0:s[0-9]+]], m0`
			`; GCN: ; use [[COPY_M0]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @inline_sreg_constraint_m0() {`
AMDGPU: Relax SGPR asm constraint register class s should be SReg_32 to be as general as possible. This can avoid a copy from m0. llvm-svn: 280154 2016-08-31 04:50:08 +08:00			`%m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"()`
			`tail call void asm sideeffect "; use $0", "s"(i32 %m0)`
			`ret void`
			`}`
AMDGPU: Default to using scalar mov to materialize immediate This is the conservatively correct way because it's easy to move or replace a scalar immediate. This was incorrect in the case when the register class wasn't known from the static instruction definition, but still needed to be an SGPR. The main example of this is inlineasm has an SGPR constraint. Also start verifying the register classes of inlineasm operands. llvm-svn: 285762 2016-11-02 06:55:07 +08:00
			`; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i32:`
			`; GCN: s_mov_b32 [[REG:s[0-9]+]], 32`
			`; GCN: ; use [[REG]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @inline_sreg_constraint_imm_i32() {`
AMDGPU: Default to using scalar mov to materialize immediate This is the conservatively correct way because it's easy to move or replace a scalar immediate. This was incorrect in the case when the register class wasn't known from the static instruction definition, but still needed to be an SGPR. The main example of this is inlineasm has an SGPR constraint. Also start verifying the register classes of inlineasm operands. llvm-svn: 285762 2016-11-02 06:55:07 +08:00			`tail call void asm sideeffect "; use $0", "s"(i32 32)`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f32:`
			`; GCN: s_mov_b32 [[REG:s[0-9]+]], 1.0`
			`; GCN: ; use [[REG]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @inline_sreg_constraint_imm_f32() {`
AMDGPU: Default to using scalar mov to materialize immediate This is the conservatively correct way because it's easy to move or replace a scalar immediate. This was incorrect in the case when the register class wasn't known from the static instruction definition, but still needed to be an SGPR. The main example of this is inlineasm has an SGPR constraint. Also start verifying the register classes of inlineasm operands. llvm-svn: 285762 2016-11-02 06:55:07 +08:00			`tail call void asm sideeffect "; use $0", "s"(float 1.0)`
			`ret void`
			`}`

			`; FIXME: Should be able to use s_mov_b64`
			`; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64:`
			`; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}}`
			`; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}}`
			`; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @inline_sreg_constraint_imm_i64() {`
AMDGPU: Default to using scalar mov to materialize immediate This is the conservatively correct way because it's easy to move or replace a scalar immediate. This was incorrect in the case when the register class wasn't known from the static instruction definition, but still needed to be an SGPR. The main example of this is inlineasm has an SGPR constraint. Also start verifying the register classes of inlineasm operands. llvm-svn: 285762 2016-11-02 06:55:07 +08:00			`tail call void asm sideeffect "; use $0", "s"(i64 -4)`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f64:`
			`; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}}`
			`; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0x3ff00000{{$}}`
			`; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}}`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @inline_sreg_constraint_imm_f64() {`
AMDGPU: Default to using scalar mov to materialize immediate This is the conservatively correct way because it's easy to move or replace a scalar immediate. This was incorrect in the case when the register class wasn't known from the static instruction definition, but still needed to be an SGPR. The main example of this is inlineasm has an SGPR constraint. Also start verifying the register classes of inlineasm operands. llvm-svn: 285762 2016-11-02 06:55:07 +08:00			`tail call void asm sideeffect "; use $0", "s"(double 1.0)`
			`ret void`
			`}`