llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll

; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s

declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0

; CHECK-LABEL: {{^}}ds_bpermute:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {
  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
  store i32 %bpermute, i32 addrspace(1)* %out, align 4
  ret void
}

; CHECK-LABEL: {{^}}ds_bpermute_imm_offset:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
define amdgpu_kernel void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
  %index = add i32 %base_index, 4
  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
  store i32 %bpermute, i32 addrspace(1)* %out, align 4
  ret void
}

; CHECK-LABEL: {{^}}ds_bpermute_imm_index:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:64
define amdgpu_kernel void @ds_bpermute_imm_index(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) #0
  store i32 %bpermute, i32 addrspace(1)* %out, align 4
  ret void
}

; CHECK-LABEL: {{^}}ds_bpermute_add_shl:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
; CHECK: s_waitcnt lgkmcnt
define void @ds_bpermute_add_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
  %index = add i32 %base_index, 1
  %byte_index = shl i32 %index, 2
  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0
  store i32 %bpermute, i32 addrspace(1)* %out, align 4
  ret void
}

; CHECK-LABEL: {{^}}ds_bpermute_or_shl:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
; CHECK: s_waitcnt lgkmcnt
define void @ds_bpermute_or_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
  %masked = and i32 %base_index, 62
  %index = or i32 %masked, 1
  %byte_index = shl i32 %index, 2
  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0
  store i32 %bpermute, i32 addrspace(1)* %out, align 4
  ret void
}

attributes #0 = { nounwind readnone convergent }
AMDGPU/SI: Implement DS_PERMUTE/DS_BPERMUTE Instruction Definitions and Intrinsics Summary: This patch impleemnts DS_PERMUTE/DS_BPERMUTE instruction definitions and intrinsics, which are new since VI. Reviewers: tstellarAMD, arsenm Subscribers: llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D17614 llvm-svn: 262356 2016-03-02 01:51:23 +08:00			`; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s \| FileCheck %s`

			`declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0`

AMDGPU: Fix some incorrect FUNC-LABEL checks 2020-02-26 17:41:41 +08:00			`; CHECK-LABEL: {{^}}ds_bpermute:`
AMDGPU/SI: Implement DS_PERMUTE/DS_BPERMUTE Instruction Definitions and Intrinsics Summary: This patch impleemnts DS_PERMUTE/DS_BPERMUTE instruction definitions and intrinsics, which are new since VI. Reviewers: tstellarAMD, arsenm Subscribers: llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D17614 llvm-svn: 262356 2016-03-02 01:51:23 +08:00			`; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {`
AMDGPU/SI: Implement DS_PERMUTE/DS_BPERMUTE Instruction Definitions and Intrinsics Summary: This patch impleemnts DS_PERMUTE/DS_BPERMUTE instruction definitions and intrinsics, which are new since VI. Reviewers: tstellarAMD, arsenm Subscribers: llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D17614 llvm-svn: 262356 2016-03-02 01:51:23 +08:00			`%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0`
			`store i32 %bpermute, i32 addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU/SI: Add offset field to ds_permute/ds_bpermute instructions Summary: These instructions can add an immediate offset to the address, like other ds instructions. Reviewers: arsenm Subscribers: arsenm, scchan Differential Revision: http://reviews.llvm.org/D19233 llvm-svn: 268043 2016-04-29 22:34:26 +08:00			`; CHECK-LABEL: {{^}}ds_bpermute_imm_offset:`
			`; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {`
AMDGPU/SI: Add offset field to ds_permute/ds_bpermute instructions Summary: These instructions can add an immediate offset to the address, like other ds instructions. Reviewers: arsenm Subscribers: arsenm, scchan Differential Revision: http://reviews.llvm.org/D19233 llvm-svn: 268043 2016-04-29 22:34:26 +08:00			`%index = add i32 %base_index, 4`
			`%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0`
			`store i32 %bpermute, i32 addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU/SI: Make sure to emit TargetConstant nodes when matching ds_permute Summary: This fixes a bug with ds_permute instructions where if it was passed a constant address, then the offset operand would get assigned a register operand instead of an immediate. Reviewers: scchan, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D19994 llvm-svn: 272349 2016-06-10 08:01:04 +08:00			`; CHECK-LABEL: {{^}}ds_bpermute_imm_index:`
			`; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:64`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @ds_bpermute_imm_index(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {`
AMDGPU/SI: Make sure to emit TargetConstant nodes when matching ds_permute Summary: This fixes a bug with ds_permute instructions where if it was passed a constant address, then the offset operand would get assigned a register operand instead of an immediate. Reviewers: scchan, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D19994 llvm-svn: 272349 2016-06-10 08:01:04 +08:00			`%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) #0`
			`store i32 %bpermute, i32 addrspace(1)* %out, align 4`
			`ret void`
			`}`

[DAGCombine] (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) We already have a combine for this pattern when the input to shl is add, so we just need to enable the transformation when the input is or. Original patch by @tstellar Differential Revision: https://reviews.llvm.org/D19325 llvm-svn: 313251 2017-09-14 18:38:30 +08:00			`; CHECK-LABEL: {{^}}ds_bpermute_add_shl:`
			`; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4`
			`; CHECK: s_waitcnt lgkmcnt`
			`define void @ds_bpermute_add_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {`
			`%index = add i32 %base_index, 1`
			`%byte_index = shl i32 %index, 2`
			`%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0`
			`store i32 %bpermute, i32 addrspace(1)* %out, align 4`
			`ret void`
			`}`

			`; CHECK-LABEL: {{^}}ds_bpermute_or_shl:`
			`; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4`
			`; CHECK: s_waitcnt lgkmcnt`
			`define void @ds_bpermute_or_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {`
			`%masked = and i32 %base_index, 62`
			`%index = or i32 %masked, 1`
			`%byte_index = shl i32 %index, 2`
			`%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0`
			`store i32 %bpermute, i32 addrspace(1)* %out, align 4`
			`ret void`
			`}`

AMDGPU/SI: Implement DS_PERMUTE/DS_BPERMUTE Instruction Definitions and Intrinsics Summary: This patch impleemnts DS_PERMUTE/DS_BPERMUTE instruction definitions and intrinsics, which are new since VI. Reviewers: tstellarAMD, arsenm Subscribers: llvm-commits, arsenm Differential Revision: http://reviews.llvm.org/D17614 llvm-svn: 262356 2016-03-02 01:51:23 +08:00			`attributes #0 = { nounwind readnone convergent }`