llvm-project/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll

; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

; This particular case will actually be worse in terms of code size
; from sinking into both.

; OPT-LABEL: @sink_ubfe_i32(
; OPT: entry:
; OPT-NEXT: br i1

; OPT: bb0:
; OPT: %0 = lshr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label

; OPT: bb1:
; OPT: %1 = lshr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label

; OPT: ret:
; OPT: store
; OPT: ret


; GCN-LABEL: {{^}}sink_ubfe_i32:
; GCN-NOT: lshr
; GCN: s_cbranch_scc1

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
; GCN: BB0_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008

; GCN: BB0_3:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
  %shr = lshr i32 %arg1, 8
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i32 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i32 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i32 %phi, i32 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @sink_sbfe_i32(
; OPT: entry:
; OPT-NEXT: br i1

; OPT: bb0:
; OPT: %0 = ashr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label

; OPT: bb1:
; OPT: %1 = ashr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_sbfe_i32:
define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
  %shr = ashr i32 %arg1, 8
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i32 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i32 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i32 %phi, i32 addrspace(1)* %out
  ret void
}


; OPT-LABEL: @sink_ubfe_i16(
; OPT: entry:
; OPT-NEXT: br i1

; OPT: bb0:
; OPT: %0 = lshr i16 %arg1, 4
; OPT-NEXT: %val0 = and i16 %0, 255
; OPT: br label

; OPT: bb1:
; OPT: %1 = lshr i16 %arg1, 4
; OPT-NEXT: %val1 = and i16 %1, 127
; OPT: br label

; OPT: ret:
; OPT: store
; OPT: ret

; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.

; GCN-LABEL: {{^}}sink_ubfe_i16:
; GCN-NOT: lshr
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc1

; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff

; GCN: BB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f

; GCN: BB2_3:
; GCN: buffer_store_short
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
entry:
  %shr = lshr i16 %arg1, 4
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i16 %shr, 255
  store volatile i16 0, i16 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i16 %shr, 127
  store volatile i16 0, i16 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i16 %phi, i16 addrspace(1)* %out
  ret void
}

; We don't really want to sink this one since it isn't reducible to a
; 32-bit BFE on one half of the integer.

; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1

; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 30
; OPT-NEXT: %val0 = and i64 %0, 255

; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 30
; OPT-NEXT: %val1 = and i64 %1, 127

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:

; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
; GCN: s_cbranch_scc1 BB3_2
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]

; GCN: BB3_2:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]

; GCN: BB3_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
  %shr = lshr i64 %arg1, 30
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i64 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i64 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i64 %phi, i64 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @sink_ubfe_i64_low32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1

; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 15
; OPT-NEXT: %val0 = and i64 %0, 255

; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 15
; OPT-NEXT: %val1 = and i64 %1, 127

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_low32:

; GCN: s_cbranch_scc1 BB4_2

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f

; GCN: BB4_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f

; GCN: BB4_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
  %shr = lshr i64 %arg1, 15
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i64 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i64 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i64 %phi, i64 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @sink_ubfe_i64_high32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1

; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 35
; OPT-NEXT: %val0 = and i64 %0, 255

; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 35
; OPT-NEXT: %val1 = and i64 %1, 127

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
; GCN: s_cbranch_scc1 BB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003

; GCN: BB5_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003

; GCN: BB5_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
  %shr = lshr i64 %arg1, 35
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i64 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i64 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i64 %phi, i64 addrspace(1)* %out
  ret void
}

attributes #0 = { nounwind }
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s \| FileCheck -check-prefix=OPT %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s \| FileCheck -check-prefix=OPT %s`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=SI %s`
Enable FeatureFlatForGlobal on Volcanic Islands This switches to the workaround that HSA defaults to for the mesa path. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 292982 2017-01-25 06:02:15 +08:00			`; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN -check-prefix=VI %s`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; This particular case will actually be worse in terms of code size`
			`; from sinking into both.`

			`; OPT-LABEL: @sink_ubfe_i32(`
			`; OPT: entry:`
			`; OPT-NEXT: br i1`

			`; OPT: bb0:`
			`; OPT: %0 = lshr i32 %arg1, 8`
			`; OPT-NEXT: %val0 = and i32 %0, 255`
			`; OPT: br label`

			`; OPT: bb1:`
			`; OPT: %1 = lshr i32 %arg1, 8`
			`; OPT-NEXT: %val1 = and i32 %1, 127`
			`; OPT: br label`

			`; OPT: ret:`
			`; OPT: store`
			`; OPT: ret`


			`; GCN-LABEL: {{^}}sink_ubfe_i32:`
			`; GCN-NOT: lshr`
AMDGPU: Select branch on undef to uniform scc branch llvm-svn: 289877 2016-12-16 05:57:11 +08:00			`; GCN: s_cbranch_scc1`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008`
			`; GCN: BB0_2:`
			`; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008`

			`; GCN: BB0_3:`
			`; GCN: buffer_store_dword`
			`; GCN: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`entry:`
			`%shr = lshr i32 %arg1, 8`
			`br i1 undef, label %bb0, label %bb1`

			`bb0:`
			`%val0 = and i32 %shr, 255`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`bb1:`
			`%val1 = and i32 %shr, 127`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`ret:`
			`%phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]`
			`store i32 %phi, i32 addrspace(1)* %out`
			`ret void`
			`}`

			`; OPT-LABEL: @sink_sbfe_i32(`
			`; OPT: entry:`
			`; OPT-NEXT: br i1`

			`; OPT: bb0:`
			`; OPT: %0 = ashr i32 %arg1, 8`
			`; OPT-NEXT: %val0 = and i32 %0, 255`
			`; OPT: br label`

			`; OPT: bb1:`
			`; OPT: %1 = ashr i32 %arg1, 8`
			`; OPT-NEXT: %val1 = and i32 %1, 127`
			`; OPT: br label`

			`; OPT: ret:`
			`; OPT: store`
			`; OPT: ret`

			`; GCN-LABEL: {{^}}sink_sbfe_i32:`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`entry:`
			`%shr = ashr i32 %arg1, 8`
			`br i1 undef, label %bb0, label %bb1`

			`bb0:`
			`%val0 = and i32 %shr, 255`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`bb1:`
			`%val1 = and i32 %shr, 127`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`ret:`
			`%phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]`
			`store i32 %phi, i32 addrspace(1)* %out`
			`ret void`
			`}`


			`; OPT-LABEL: @sink_ubfe_i16(`
			`; OPT: entry:`
			`; OPT-NEXT: br i1`

			`; OPT: bb0:`
			`; OPT: %0 = lshr i16 %arg1, 4`
			`; OPT-NEXT: %val0 = and i16 %0, 255`
			`; OPT: br label`

			`; OPT: bb1:`
			`; OPT: %1 = lshr i16 %arg1, 4`
			`; OPT-NEXT: %val1 = and i16 %1, 127`
			`; OPT: br label`

			`; OPT: ret:`
			`; OPT: store`
			`; OPT: ret`

AMDGPU: Add VI i16 support Patch By: Wei Ding Differential Revision: https://reviews.llvm.org/D18049 llvm-svn: 286464 2016-11-11 00:02:37 +08:00			`; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN-LABEL: {{^}}sink_ubfe_i16:`
			`; GCN-NOT: lshr`
[AMDGPU] Remove getBidirectionalReasonRank This method inverts the Reason field of a scheduling candidate. It does right comparison between RegCritical and RegExcess, but everything else is broken. In fact it can prefer less strong reason such as Weak over RegCritical because Weak > -RegCritical. The CandReason enum is properly sorted, so just remove artificial ranking. Differential Revision: https://reviews.llvm.org/D30557 llvm-svn: 297536 2017-03-11 08:29:27 +08:00			`; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c`
			`; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004`
AMDGPU: Select branch on undef to uniform scc branch llvm-svn: 289877 2016-12-16 05:57:11 +08:00			`; GCN: s_cbranch_scc1`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
AMDGPU: Add VI i16 support Patch By: Wei Ding Differential Revision: https://reviews.llvm.org/D18049 llvm-svn: 286464 2016-11-11 00:02:37 +08:00			`; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004`
[AMDGPU] Remove getBidirectionalReasonRank This method inverts the Reason field of a scheduling candidate. It does right comparison between RegCritical and RegExcess, but everything else is broken. In fact it can prefer less strong reason such as Weak over RegCritical because Weak > -RegCritical. The CandReason enum is properly sorted, so just remove artificial ranking. Differential Revision: https://reviews.llvm.org/D30557 llvm-svn: 297536 2017-03-11 08:29:27 +08:00			`; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff`
AMDGPU: Add VI i16 support Patch By: Wei Ding Differential Revision: https://reviews.llvm.org/D18049 llvm-svn: 286464 2016-11-11 00:02:37 +08:00
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`; GCN: BB2_2:`
AMDGPU: Add VI i16 support Patch By: Wei Ding Differential Revision: https://reviews.llvm.org/D18049 llvm-svn: 286464 2016-11-11 00:02:37 +08:00			`; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004`
[AMDGPU] Remove getBidirectionalReasonRank This method inverts the Reason field of a scheduling candidate. It does right comparison between RegCritical and RegExcess, but everything else is broken. In fact it can prefer less strong reason such as Weak over RegCritical because Weak > -RegCritical. The CandReason enum is properly sorted, so just remove artificial ranking. Differential Revision: https://reviews.llvm.org/D30557 llvm-svn: 297536 2017-03-11 08:29:27 +08:00			`; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN: BB2_3:`
			`; GCN: buffer_store_short`
			`; GCN: s_endpgm`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`entry:`
			`%shr = lshr i16 %arg1, 4`
			`br i1 undef, label %bb0, label %bb1`

			`bb0:`
			`%val0 = and i16 %shr, 255`
			`store volatile i16 0, i16 addrspace(1)* undef`
			`br label %ret`

			`bb1:`
			`%val1 = and i16 %shr, 127`
			`store volatile i16 0, i16 addrspace(1)* undef`
			`br label %ret`

			`ret:`
			`%phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]`
			`store i16 %phi, i16 addrspace(1)* %out`
			`ret void`
			`}`

			`; We don't really want to sink this one since it isn't reducible to a`
			`; 32-bit BFE on one half of the integer.`

			`; OPT-LABEL: @sink_ubfe_i64_span_midpoint(`
			`; OPT: entry:`
			`; OPT-NOT: lshr`
			`; OPT: br i1`

			`; OPT: bb0:`
			`; OPT: %0 = lshr i64 %arg1, 30`
			`; OPT-NEXT: %val0 = and i64 %0, 255`

			`; OPT: bb1:`
			`; OPT: %1 = lshr i64 %arg1, 30`
			`; OPT-NEXT: %val1 = and i64 %1, 127`

			`; OPT: ret:`
			`; OPT: store`
			`; OPT: ret`

			`; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:`

[AMDGPU] Add pattern for v_alignbit_b32 with immediate If immediate in shift is less than 32 we can use alignbit too. Differential Revision: https://reviews.llvm.org/D34729 llvm-svn: 306500 2017-06-28 10:52:39 +08:00			`; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30`
			`; GCN: s_cbranch_scc1 BB3_2`
			`; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN: BB3_2:`
[AMDGPU] Add pattern for v_alignbit_b32 with immediate If immediate in shift is less than 32 we can use alignbit too. Differential Revision: https://reviews.llvm.org/D34729 llvm-svn: 306500 2017-06-28 10:52:39 +08:00			`; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN: BB3_3:`
			`; GCN: buffer_store_dwordx2`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`entry:`
			`%shr = lshr i64 %arg1, 30`
			`br i1 undef, label %bb0, label %bb1`

			`bb0:`
			`%val0 = and i64 %shr, 255`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`bb1:`
			`%val1 = and i64 %shr, 127`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`ret:`
			`%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]`
			`store i64 %phi, i64 addrspace(1)* %out`
			`ret void`
			`}`

			`; OPT-LABEL: @sink_ubfe_i64_low32(`
			`; OPT: entry:`
			`; OPT-NOT: lshr`
			`; OPT: br i1`

			`; OPT: bb0:`
			`; OPT: %0 = lshr i64 %arg1, 15`
			`; OPT-NEXT: %val0 = and i64 %0, 255`

			`; OPT: bb1:`
			`; OPT: %1 = lshr i64 %arg1, 15`
			`; OPT-NEXT: %val1 = and i64 %1, 127`

			`; OPT: ret:`
			`; OPT: store`
			`; OPT: ret`

			`; GCN-LABEL: {{^}}sink_ubfe_i64_low32:`

AMDGPU: Select branch on undef to uniform scc branch llvm-svn: 289877 2016-12-16 05:57:11 +08:00			`; GCN: s_cbranch_scc1 BB4_2`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
DAGCombiner: Reduce 64-bit BFE pattern to pattern on 32-bit component If the extracted bits are restricted to the upper half or lower half, this can be truncated. llvm-svn: 267024 2016-04-22 02:03:06 +08:00			`; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN: BB4_2:`
DAGCombiner: Reduce 64-bit BFE pattern to pattern on 32-bit component If the extracted bits are restricted to the upper half or lower half, this can be truncated. llvm-svn: 267024 2016-04-22 02:03:06 +08:00			`; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00
			`; GCN: BB4_3:`
			`; GCN: buffer_store_dwordx2`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`entry:`
			`%shr = lshr i64 %arg1, 15`
			`br i1 undef, label %bb0, label %bb1`

			`bb0:`
			`%val0 = and i64 %shr, 255`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`bb1:`
			`%val1 = and i64 %shr, 127`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`ret:`
			`%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]`
			`store i64 %phi, i64 addrspace(1)* %out`
			`ret void`
			`}`

			`; OPT-LABEL: @sink_ubfe_i64_high32(`
			`; OPT: entry:`
			`; OPT-NOT: lshr`
			`; OPT: br i1`

			`; OPT: bb0:`
			`; OPT: %0 = lshr i64 %arg1, 35`
			`; OPT-NEXT: %val0 = and i64 %0, 255`

			`; OPT: bb1:`
			`; OPT: %1 = lshr i64 %arg1, 35`
			`; OPT-NEXT: %val1 = and i64 %1, 127`

			`; OPT: ret:`
			`; OPT: store`
			`; OPT: ret`

			`; GCN-LABEL: {{^}}sink_ubfe_i64_high32:`
AMDGPU: Select branch on undef to uniform scc branch llvm-svn: 289877 2016-12-16 05:57:11 +08:00			`; GCN: s_cbranch_scc1 BB5_2`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003`

			`; GCN: BB5_2:`
			`; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003`

			`; GCN: BB5_3:`
			`; GCN: buffer_store_dwordx2`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {`
AMDGPU: Set HasExtractBitInsn This currently does not have the control over the bitwidth, and there are missing optimizations to reduce the integer to 32-bit if it can be. But in most situations we do want the sinking to occur. llvm-svn: 262296 2016-03-01 12:58:17 +08:00			`entry:`
			`%shr = lshr i64 %arg1, 35`
			`br i1 undef, label %bb0, label %bb1`

			`bb0:`
			`%val0 = and i64 %shr, 255`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`bb1:`
			`%val1 = and i64 %shr, 127`
			`store volatile i32 0, i32 addrspace(1)* undef`
			`br label %ret`

			`ret:`
			`%phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]`
			`store i64 %phi, i64 addrspace(1)* %out`
			`ret void`
			`}`

			`attributes #0 = { nounwind }`