llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0

; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
; GCN-DAG: v_mov_b32_e32 v0, v2
; GCN-DAG: v_mov_b32_e32 v1, v3
; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
  ret void
}

; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
; GCN-DAG: v_mov_b32_e32 v0, v2
; GCN-DAG: v_mov_b32_e32 v1, v3
; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) {
  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0
  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
  ret void
}

; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate:
; GCN-DAG: v_mov_b32_e32 v0, v2
; GCN-DAG: v_mov_b32_e32 v1, v3
; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
  ret void
}

; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr:
; GCN-DAG: v_mov_b32_e32 v0, v2
; GCN-DAG: v_mov_b32_e32 v1, v3
; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) {
  %in = load <4 x i32>, <4 x i32> addrspace(1) * %input
  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0
  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
  ret void
}

attributes #0 = { nounwind readnone }
AMDGPU : Add LLVM intrinsics for SAD related instructions. Differential Revision: http://reviews.llvm.org/D23133 llvm-svn: 278354 2016-08-12 00:33:53 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s`
			`; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s`

AMDGPU : Fix mqsad_u32_u8 instruction incorrect data type. Differential Revision: http://reviews.llvm.org/D23700 llvm-svn: 281081 2016-09-10 03:31:51 +08:00			`declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0`
AMDGPU : Add LLVM intrinsics for SAD related instructions. Differential Revision: http://reviews.llvm.org/D23133 llvm-svn: 278354 2016-08-12 00:33:53 +08:00
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:`
			`; GCN-DAG: v_mov_b32_e32 v0, v2`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`; GCN-DAG: v_mov_b32_e32 v1, v3`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]`
			`define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0`
			`%tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`%tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4`
AMDGPU : Add LLVM intrinsics for SAD related instructions. Differential Revision: http://reviews.llvm.org/D23133 llvm-svn: 278354 2016-08-12 00:33:53 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN-DAG: v_mov_b32_e32 v0, v2`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`; GCN-DAG: v_mov_b32_e32 v1, v3`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) {`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0`
			`%tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`%tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4`
AMDGPU : Fix mqsad_u32_u8 instruction incorrect data type. Differential Revision: http://reviews.llvm.org/D23700 llvm-svn: 281081 2016-09-10 03:31:51 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate:`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN-DAG: v_mov_b32_e32 v0, v2`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`; GCN-DAG: v_mov_b32_e32 v1, v3`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0`
			`%tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`%tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4`
AMDGPU : Fix mqsad_u32_u8 instruction incorrect data type. Differential Revision: http://reviews.llvm.org/D23700 llvm-svn: 281081 2016-09-10 03:31:51 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr:`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN-DAG: v_mov_b32_e32 v0, v2`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`; GCN-DAG: v_mov_b32_e32 v1, v3`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) {`
AMDGPU : Fix mqsad_u32_u8 instruction incorrect data type. Differential Revision: http://reviews.llvm.org/D23700 llvm-svn: 281081 2016-09-10 03:31:51 +08:00			`%in = load <4 x i32>, <4 x i32> addrspace(1) * %input`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0`
			`%tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`%tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0`
AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`%tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0`
[AMDGPU] Force qsads instrs to use different dest register than source registers The V_MQSAD_PK_U16_U8, V_QSAD_PK_U16_U8, and V_MQSAD_U32_U8 take more than 1 pass in hardware. For these three instructions, the destination registers must be different than all sources, so that the first pass does not overwrite sources for the following passes. Differential Revision: https://reviews.llvm.org/D33783 llvm-svn: 304998 2017-06-09 02:21:19 +08:00			`store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4`
AMDGPU : Add LLVM intrinsics for SAD related instructions. Differential Revision: http://reviews.llvm.org/D23133 llvm-svn: 278354 2016-08-12 00:33:53 +08:00			`ret void`
			`}`

AMDGPU: Use correct register names in inline assembly Fixes using physical registers in inline asm from clang. llvm-svn: 305004 2017-06-09 03:03:20 +08:00			`attributes #0 = { nounwind readnone }`