llvm-project/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll

; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

; GCN-LABEL: {{^}}reduce_i64_load_align_4_width_to_i32:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, [[VAL]]
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
  %a = load i64, i64 addrspace(1)* %in, align 4
  %and = and i64 %a, 1234567
  store i64 %and, i64 addrspace(1)* %out, align 8
  ret void
}

; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt0:
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: buffer_store_dword [[VAL]]
define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
  %a = load i64, i64 addrspace(1)* %in, align 4
  %vec = bitcast i64 %a to <2 x i32>
  %elt0 = extractelement <2 x i32> %vec, i32 0
  store i32 %elt0, i32 addrspace(1)* %out
  ret void
}

; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
; GCN: buffer_store_dword [[VAL]]
define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
  %a = load i64, i64 addrspace(1)* %in, align 4
  %vec = bitcast i64 %a to <2 x i32>
  %elt0 = extractelement <2 x i32> %vec, i32 1
  store i32 %elt0, i32 addrspace(1)* %out
  ret void
}

attributes #0 = { nounwind }
[AMDGPU] Switch scalarize global loads ON by default Differential revision: https://reviews.llvm.org/D34407 llvm-svn: 307097 2017-07-05 01:32:00 +08:00			`; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s`
			`; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=CI %s`
			`; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s`
DAGCombiner: Relax alignment restriction when changing load type If the target allows the alignment, this should still be OK. llvm-svn: 267209 2016-04-23 04:21:36 +08:00
			`; GCN-LABEL: {{^}}reduce_i64_load_align_4_width_to_i32:`
			`; GCN: buffer_load_dword [[VAL:v[0-9]+]]`
			`; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, [[VAL]]`
			`; GCN: buffer_store_dwordx2`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {`
DAGCombiner: Relax alignment restriction when changing load type If the target allows the alignment, this should still be OK. llvm-svn: 267209 2016-04-23 04:21:36 +08:00			`%a = load i64, i64 addrspace(1)* %in, align 4`
			`%and = and i64 %a, 1234567`
			`store i64 %and, i64 addrspace(1)* %out, align 8`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt0:`
			`; GCN: buffer_load_dword [[VAL:v[0-9]+]]`
			`; GCN: buffer_store_dword [[VAL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {`
DAGCombiner: Relax alignment restriction when changing load type If the target allows the alignment, this should still be OK. llvm-svn: 267209 2016-04-23 04:21:36 +08:00			`%a = load i64, i64 addrspace(1)* %in, align 4`
			`%vec = bitcast i64 %a to <2 x i32>`
			`%elt0 = extractelement <2 x i32> %vec, i32 0`
			`store i32 %elt0, i32 addrspace(1)* %out`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:`
AMDGPU/SI: Assembler: Unify parsing/printing of operands. Summary: The goal is for each operand type to have its own parse function and at the same time share common code for tracking state as different instruction types share operand types (e.g. glc/glc_flat, etc). Introduce parseAMDGPUOperand which can parse any optional operand. DPP and Clamp/OMod have custom handling for now. Sam also suggested to have class hierarchy for operand types instead of table. This can be done in separate change. Remove parseVOP3OptionalOps, parseDS*OptionalOps, parseFlatOptionalOps, parseMubufOptionalOps, parseDPPOptionalOps. Reduce number of definitions of AsmOperand's and MatchClasses' by using common base class. Rename AsmMatcher/InstPrinter methods accordingly. Print immediate type when printing parsed immediate operand. Use 'off' if offset/index register is unused instead of skipping it to make it more readable (also agreed with SP3). Update tests. Reviewers: tstellarAMD, SamWot, artem.tamazov Subscribers: qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D19584 llvm-svn: 268015 2016-04-29 17:02:30 +08:00			`; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4`
DAGCombiner: Relax alignment restriction when changing load type If the target allows the alignment, this should still be OK. llvm-svn: 267209 2016-04-23 04:21:36 +08:00			`; GCN: buffer_store_dword [[VAL]]`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {`
DAGCombiner: Relax alignment restriction when changing load type If the target allows the alignment, this should still be OK. llvm-svn: 267209 2016-04-23 04:21:36 +08:00			`%a = load i64, i64 addrspace(1)* %in, align 4`
			`%vec = bitcast i64 %a to <2 x i32>`
			`%elt0 = extractelement <2 x i32> %vec, i32 1`
			`store i32 %elt0, i32 addrspace(1)* %out`
			`ret void`
			`}`

			`attributes #0 = { nounwind }`