llvm-project/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx902  -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s

; GCN-LABEL: {{^}}add1:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask

; GFX9-LABEL: {{^}}add1:
; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = zext i1 %cmp to i32
  %add = add i32 %v, %ext
  store i32 %add, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}add1_i16:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask

; GFX9-LABEL: {{^}}add1_i16:
; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define i16 @add1_i16(i32 addrspace(1)* nocapture %arg, i16 addrspace(1)* nocapture %dst) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = zext i1 %cmp to i32
  %add = add i32 %v, %ext
  %trunc = trunc i32 %add to i16
  ret i16 %trunc
}

; GCN-LABEL: {{^}}sub1:
; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
; GCN-NOT: v_cndmask

; GFX9-LABEL: {{^}}sub1:
; GFX9: v_subbrev_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = sext i1 %cmp to i32
  %add = add i32 %v, %ext
  store i32 %add, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}add_adde:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask
; GCN-NOT: v_add

; GFX9-LABEL: {{^}}add_adde:
; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = zext i1 %cmp to i32
  %adde = add i32 %v, %ext
  %add2 = add i32 %adde, %a
  store i32 %add2, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}adde_add:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask
; GCN-NOT: v_add

; GFX9-LABEL: {{^}}adde_add:
; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = zext i1 %cmp to i32
  %add = add i32 %v, %a
  %adde = add i32 %add, %ext
  store i32 %adde, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}sub_sube:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask
; GCN-NOT: v_sub

; GFX9-LABEL: {{^}}sub_sube:
; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = sext i1 %cmp to i32
  %adde = add i32 %v, %ext
  %sub = sub i32 %adde, %a
  store i32 %sub, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}sube_sub:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask
; GCN-NOT: v_sub

; GFX9-LABEL: {{^}}sube_sub:
; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = icmp ugt i32 %x, %y
  %ext = sext i1 %cmp to i32
  %sub = sub i32 %v, %a
  %adde = add i32 %sub, %ext
  store i32 %adde, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}zext_flclass:
; GCN: v_cmp_class_f32_e{{32|64}} [[CC:[^,]+]],
; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask

; GFX9-LABEL: {{^}}zext_flclass:
; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @zext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {
bb:
  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)
  %ext = zext i1 %cmp to i32
  %add = add i32 %v, %ext
  store i32 %add, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}sext_flclass:
; GCN: v_cmp_class_f32_e32 vcc,
; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
; GCN-NOT: v_cndmask

; GFX9-LABEL: {{^}}sext_flclass:
; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc
define amdgpu_kernel void @sext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {
bb:
  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)
  %ext = sext i1 %cmp to i32
  %add = add i32 %v, %ext
  store i32 %add, i32 addrspace(1)* %gep, align 4
  ret void
}

; GCN-LABEL: {{^}}add_and:
; GCN: s_and_b64 [[CC:[^,]+]],
; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
; GCN-NOT: v_cndmask

; GFX9-LABEL: {{^}}add_and:
; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
bb:
  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
  %v = load i32, i32 addrspace(1)* %gep, align 4
  %cmp1 = icmp ugt i32 %x, %y
  %cmp2 = icmp ugt i32 %x, 1
  %cmp = and i1 %cmp1, %cmp2
  %ext = zext i1 %cmp to i32
  %add = add i32 %v, %ext
  store i32 %add, i32 addrspace(1)* %gep, align 4
  ret void
}

declare i1 @llvm.amdgcn.class.f32(float, i32) #0

declare i32 @llvm.amdgcn.workitem.id.x() #0

declare i32 @llvm.amdgcn.workitem.id.y() #0

attributes #0 = { nounwind readnone speculatable }
[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s`
[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s \| FileCheck -check-prefix=GFX9 %s`
[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00
			`; GCN-LABEL: {{^}}add1:`
			`; GCN: v_cmp_gt_u32_e{{32\|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_addc_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}add1:`
			`; GFX9: v_addc_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00			`define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = zext i1 %cmp to i32`
			`%add = add i32 %v, %ext`
			`store i32 %add, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GCN-LABEL: {{^}}add1_i16:`
			`; GCN: v_cmp_gt_u32_e{{32\|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_addc_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`

			`; GFX9-LABEL: {{^}}add1_i16:`
			`; GFX9: v_addc_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
			`define i16 @add1_i16(i32 addrspace(1)* nocapture %arg, i16 addrspace(1)* nocapture %dst) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = zext i1 %cmp to i32`
			`%add = add i32 %v, %ext`
			`%trunc = trunc i32 %add to i16`
			`ret i16 %trunc`
			`}`

[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00			`; GCN-LABEL: {{^}}sub1:`
[AMDGPU] Shrinking V_SUBBREV_U32 V_SUBBREV_U32 is a commute opcode for V_SUBB_U32. However, when we try to commute V_SUBB_U32 in order to shrink it we do not then process V_SUBBREV_U32 and it stay VOP3. This is fixed. Differential Revision: https://reviews.llvm.org/D43699 llvm-svn: 326011 2018-02-24 09:32:32 +08:00			`; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc`
[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00			`; GCN-NOT: v_cndmask`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}sub1:`
			`; GFX9: v_subbrev_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00			`define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = sext i1 %cmp to i32`
			`%add = add i32 %v, %ext`
			`store i32 %add, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

[AMDGPU] Combine add and adde, sub and sube If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964 2017-06-22 06:30:01 +08:00			`; GCN-LABEL: {{^}}add_adde:`
			`; GCN: v_cmp_gt_u32_e{{32\|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_addc_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`
			`; GCN-NOT: v_add`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}add_adde:`
			`; GFX9: v_addc_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] Combine add and adde, sub and sube If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964 2017-06-22 06:30:01 +08:00			`define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = zext i1 %cmp to i32`
			`%adde = add i32 %v, %ext`
			`%add2 = add i32 %adde, %a`
			`store i32 %add2, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}adde_add:`
			`; GCN: v_cmp_gt_u32_e{{32\|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_addc_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`
			`; GCN-NOT: v_add`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}adde_add:`
			`; GFX9: v_addc_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] Combine add and adde, sub and sube If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964 2017-06-22 06:30:01 +08:00			`define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = zext i1 %cmp to i32`
			`%add = add i32 %v, %a`
			`%adde = add i32 %add, %ext`
			`store i32 %adde, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}sub_sube:`
			`; GCN: v_cmp_gt_u32_e{{32\|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_subb_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`
			`; GCN-NOT: v_sub`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}sub_sube:`
			`; GFX9: v_subb_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] Combine add and adde, sub and sube If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964 2017-06-22 06:30:01 +08:00			`define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = sext i1 %cmp to i32`
			`%adde = add i32 %v, %ext`
			`%sub = sub i32 %adde, %a`
			`store i32 %sub, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}sube_sub:`
			`; GCN: v_cmp_gt_u32_e{{32\|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}`
			`; GCN: v_subb_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`
			`; GCN-NOT: v_sub`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}sube_sub:`
			`; GFX9: v_subb_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] Combine add and adde, sub and sube If one of the arguments of adde/sube is zero we can fold another add/sub into it. Differential Revision: https://reviews.llvm.org/D34374 llvm-svn: 305964 2017-06-22 06:30:01 +08:00			`define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = icmp ugt i32 %x, %y`
			`%ext = sext i1 %cmp to i32`
			`%sub = sub i32 %v, %a`
			`%adde = add i32 %sub, %ext`
			`store i32 %adde, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

[AMDGPU] Add FP_CLASS to the add/setcc combine This is one of the nodes which also compile as v_cmp_*. Differential Revision: https://reviews.llvm.org/D34485 llvm-svn: 305970 2017-06-22 07:46:22 +08:00			`; GCN-LABEL: {{^}}zext_flclass:`
			`; GCN: v_cmp_class_f32_e{{32\|64}} [[CC:[^,]+]],`
			`; GCN: v_addc_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}zext_flclass:`
			`; GFX9: v_addc_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] Add FP_CLASS to the add/setcc combine This is one of the nodes which also compile as v_cmp_*. Differential Revision: https://reviews.llvm.org/D34485 llvm-svn: 305970 2017-06-22 07:46:22 +08:00			`define amdgpu_kernel void @zext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {`
			`bb:`
			`%id = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)`
			`%ext = zext i1 %cmp to i32`
			`%add = add i32 %v, %ext`
			`store i32 %add, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

			`; GCN-LABEL: {{^}}sext_flclass:`
[AMDGPU] Shrinking V_SUBBREV_U32 V_SUBBREV_U32 is a commute opcode for V_SUBB_U32. However, when we try to commute V_SUBB_U32 in order to shrink it we do not then process V_SUBBREV_U32 and it stay VOP3. This is fixed. Differential Revision: https://reviews.llvm.org/D43699 llvm-svn: 326011 2018-02-24 09:32:32 +08:00			`; GCN: v_cmp_class_f32_e32 vcc,`
			`; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc`
[AMDGPU] Add FP_CLASS to the add/setcc combine This is one of the nodes which also compile as v_cmp_*. Differential Revision: https://reviews.llvm.org/D34485 llvm-svn: 305970 2017-06-22 07:46:22 +08:00			`; GCN-NOT: v_cndmask`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}sext_flclass:`
			`; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc`
[AMDGPU] Add FP_CLASS to the add/setcc combine This is one of the nodes which also compile as v_cmp_*. Differential Revision: https://reviews.llvm.org/D34485 llvm-svn: 305970 2017-06-22 07:46:22 +08:00			`define amdgpu_kernel void @sext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {`
			`bb:`
			`%id = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)`
			`%ext = sext i1 %cmp to i32`
			`%add = add i32 %v, %ext`
			`store i32 %add, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

[AMDGPU] Combine and x, (sext cc from i1) => select cc, x, 0 Also factored out function to check if a boolean is an already deserialized value which does not require v_cndmask_b32 to be loaded. Added binary logical operators to its check. Differential Revision: https://reviews.llvm.org/D34500 llvm-svn: 306439 2017-06-28 02:25:26 +08:00			`; GCN-LABEL: {{^}}add_and:`
			`; GCN: s_and_b64 [[CC:[^,]+]],`
			`; GCN: v_addc_u32_e{{32\|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]`
			`; GCN-NOT: v_cndmask`

[AMDGPU] A trivial fix for a buildbot failure caused by "commit 224a839fcbbead221f872cd32a1dd0c308d37299". Author: FarhanaAleen llvm-svn: 331383 2018-05-03 02:16:39 +08:00			`; GFX9-LABEL: {{^}}add_and:`
			`; GFX9: v_addc_co_u32_e{{32\|64}} v{{[0-9]+}}, vcc`
[AMDGPU] Combine and x, (sext cc from i1) => select cc, x, 0 Also factored out function to check if a boolean is an already deserialized value which does not require v_cndmask_b32 to be loaded. Added binary logical operators to its check. Differential Revision: https://reviews.llvm.org/D34500 llvm-svn: 306439 2017-06-28 02:25:26 +08:00			`define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {`
			`bb:`
			`%x = tail call i32 @llvm.amdgcn.workitem.id.x()`
			`%y = tail call i32 @llvm.amdgcn.workitem.id.y()`
			`%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x`
			`%v = load i32, i32 addrspace(1)* %gep, align 4`
			`%cmp1 = icmp ugt i32 %x, %y`
			`%cmp2 = icmp ugt i32 %x, 1`
			`%cmp = and i1 %cmp1, %cmp2`
			`%ext = zext i1 %cmp to i32`
			`%add = add i32 %v, %ext`
			`store i32 %add, i32 addrspace(1)* %gep, align 4`
			`ret void`
			`}`

[AMDGPU] Add FP_CLASS to the add/setcc combine This is one of the nodes which also compile as v_cmp_*. Differential Revision: https://reviews.llvm.org/D34485 llvm-svn: 305970 2017-06-22 07:46:22 +08:00			`declare i1 @llvm.amdgcn.class.f32(float, i32) #0`

[AMDGPU] simplify add x, *ext (setcc) => addc\|subb x, 0, setcc This simplification allows to avoid generating v_cndmask_b32 to serialize condition code between compare and use. Differential Revision: https://reviews.llvm.org/D34300 llvm-svn: 305962 2017-06-22 06:05:06 +08:00			`declare i32 @llvm.amdgcn.workitem.id.x() #0`

			`declare i32 @llvm.amdgcn.workitem.id.y() #0`

			`attributes #0 = { nounwind readnone speculatable }`