llvm-project/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir

# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s

# If there's a base offset, check that SILoadStoreOptimizer creates
# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than
# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
# clobbered.

# GCN-LABEL: name: ds_combine_base_offset{{$}}

# VI: V_ADD_I32_e64 %6, %0,
# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
# VI: V_ADD_I32_e64 %10, %3,
# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,

# GFX9: V_ADD_U32_e64 %6, %0,
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8,
# GFX9: V_ADD_U32_e64 %9, %3,
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,

--- |
  @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4

  define amdgpu_kernel void @ds_combine_base_offset() {
    bb.0:
      br label %bb2

    bb1:
      ret void

    bb2:
      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
      br label %bb1
    }

  define amdgpu_kernel void @ds_combine_base_offset_subreg() {
    bb.0:
      br label %bb2

    bb1:
      ret void

    bb2:
      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
      br label %bb1
    }

  define amdgpu_kernel void @ds_combine_subreg() {
    bb.0:
      br label %bb2

    bb1:
      ret void

    bb2:
      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
      br label %bb1
    }
---
name:            ds_combine_base_offset
body:             |
  bb.0:
    %0:vgpr_32 = IMPLICIT_DEF
    S_BRANCH %bb.2

  bb.1:
    S_ENDPGM 0

  bb.2:
    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec
    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
    DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
    %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
    %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
    %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
    S_BRANCH %bb.1
...

# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}

# VI: V_ADD_I32_e64 %6, %0.sub0,
# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
# VI: V_ADD_I32_e64 %10, %3.sub0,
# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,

# GFX9: V_ADD_U32_e64 %6, %0.sub0,
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,
# GFX9: V_ADD_U32_e64 %9, %3.sub0,
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
---
name:            ds_combine_base_offset_subreg
body:             |
  bb.0:
    %0:vreg_64 = IMPLICIT_DEF
    S_BRANCH %bb.2

  bb.1:
    S_ENDPGM 0

  bb.2:
    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
    DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
    undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
    DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
    %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
    %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
    S_BRANCH %bb.1
...

# GCN-LABEL: name: ds_combine_subreg{{$}}

# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,
# VI: DS_READ2_B32 %3.sub0, 0, 8,

# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,
# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,
---
name:            ds_combine_subreg
body:             |
  bb.0:
    %0:vreg_64 = IMPLICIT_DEF
    S_BRANCH %bb.2

  bb.1:
    S_ENDPGM 0

  bb.2:
    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
    DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
    undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
    DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
    %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
    %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
    S_BRANCH %bb.1
...
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00			`# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s \| FileCheck -check-prefixes=GCN,VI %s`
			`# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s \| FileCheck -check-prefixes=GCN,GFX9 %s`

			`# If there's a base offset, check that SILoadStoreOptimizer creates`
			`# V_ADD_{I\|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than`
Followup on Proposal to move MIR physical register namespace to '$' sigil. Discussed here: http://lists.llvm.org/pipermail/llvm-dev/2018-January/120320.html In preparation for adding support for named vregs we are changing the sigil for physical registers in MIR to '$' from '%'. This will prevent name clashes of named physical register with named vregs. llvm-svn: 323922 2018-02-01 06:04:26 +08:00			`# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently`
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00			`# clobbered.`

[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00			`# GCN-LABEL: name: ds_combine_base_offset{{$}}`
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00
			`# VI: V_ADD_I32_e64 %6, %0,`
			`# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,`
			`# VI: V_ADD_I32_e64 %10, %3,`
			`# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,`

			`# GFX9: V_ADD_U32_e64 %6, %0,`
			`# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8,`
			`# GFX9: V_ADD_U32_e64 %9, %3,`
			`# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,`

			`--- \|`
			`@0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4`

[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00			`define amdgpu_kernel void @ds_combine_base_offset() {`
			`bb.0:`
			`br label %bb2`

			`bb1:`
			`ret void`

			`bb2:`
			`%tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0`
			`%tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8`
			`%tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16`
			`%tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24`
			`br label %bb1`
			`}`

			`define amdgpu_kernel void @ds_combine_base_offset_subreg() {`
			`bb.0:`
			`br label %bb2`

			`bb1:`
			`ret void`

			`bb2:`
			`%tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0`
			`%tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8`
			`%tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16`
			`%tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24`
			`br label %bb1`
			`}`

			`define amdgpu_kernel void @ds_combine_subreg() {`
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00			`bb.0:`
			`br label %bb2`

			`bb1:`
			`ret void`

			`bb2:`
			`%tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0`
			`%tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8`
			`%tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16`
			`%tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24`
			`br label %bb1`
			`}`
			`---`
[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00			`name: ds_combine_base_offset`
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00			`body: \|`
			`bb.0:`
			`%0:vgpr_32 = IMPLICIT_DEF`
			`S_BRANCH %bb.2`

			`bb.1:`
[AMDGPU] Add support for immediate operand for S_ENDPGM Summary: Add support for immediate operand in S_ENDPGM Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6 Reviewers: alexshap Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59213 llvm-svn: 355902 2019-03-12 17:52:58 +08:00			`S_ENDPGM 0`
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00
			`bb.2:`
Followup on Proposal to move MIR physical register namespace to '$' sigil. Discussed here: http://lists.llvm.org/pipermail/llvm-dev/2018-January/120320.html In preparation for adding support for named vregs we are changing the sigil for physical registers in MIR to '$' from '%'. This will prevent name clashes of named physical register with named vregs. llvm-svn: 323922 2018-02-01 06:04:26 +08:00			`%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec`
[AMDGPU] Asm/disasm v_cndmask_b32_e64 with abs/neg source modifiers This commit allows v_cndmask_b32_e64 with abs, neg source modifiers on src0, src1 to be assembled and disassembled. This does appear to be allowed, even though they are floating point modifiers and the operand type is b32. To do this, I added src0_modifiers and src1_modifiers to the MachineInstr, which involved fixing up several places in codegen and mir tests. Differential Revision: https://reviews.llvm.org/D59191 Change-Id: I69bf4a8c73ebc65744f6110bb8fc4e937d79fbea llvm-svn: 356398 2019-03-19 03:25:39 +08:00			`%2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec`
Followup on Proposal to move MIR physical register namespace to '$' sigil. Discussed here: http://lists.llvm.org/pipermail/llvm-dev/2018-January/120320.html In preparation for adding support for named vregs we are changing the sigil for physical registers in MIR to '$' from '%'. This will prevent name clashes of named physical register with named vregs. llvm-svn: 323922 2018-02-01 06:04:26 +08:00			`V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec`
			`DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)`
			`%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec`
			`DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)`
			`%4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)`
			`%5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)`
			`$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc`
			`S_CBRANCH_VCCNZ %bb.1, implicit $vcc`
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD_{I\|U}32_e64 - Change inserted add ( V_ADD_{I\|U}32_e32 ) to _e64 version ( V_ADD_{I\|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153 2018-01-23 05:46:43 +08:00			`S_BRANCH %bb.1`
			`...`
[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00
			`# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}`

			`# VI: V_ADD_I32_e64 %6, %0.sub0,`
			`# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,`
			`# VI: V_ADD_I32_e64 %10, %3.sub0,`
			`# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,`

			`# GFX9: V_ADD_U32_e64 %6, %0.sub0,`
			`# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,`
			`# GFX9: V_ADD_U32_e64 %9, %3.sub0,`
			`# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,`
			`---`
			`name: ds_combine_base_offset_subreg`
			`body: \|`
			`bb.0:`
			`%0:vreg_64 = IMPLICIT_DEF`
			`S_BRANCH %bb.2`

			`bb.1:`
[AMDGPU] Add support for immediate operand for S_ENDPGM Summary: Add support for immediate operand in S_ENDPGM Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6 Reviewers: alexshap Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59213 llvm-svn: 355902 2019-03-12 17:52:58 +08:00			`S_ENDPGM 0`
[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00
			`bb.2:`
			`%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec`
[AMDGPU] Asm/disasm v_cndmask_b32_e64 with abs/neg source modifiers This commit allows v_cndmask_b32_e64 with abs, neg source modifiers on src0, src1 to be assembled and disassembled. This does appear to be allowed, even though they are floating point modifiers and the operand type is b32. To do this, I added src0_modifiers and src1_modifiers to the MachineInstr, which involved fixing up several places in codegen and mir tests. Differential Revision: https://reviews.llvm.org/D59191 Change-Id: I69bf4a8c73ebc65744f6110bb8fc4e937d79fbea llvm-svn: 356398 2019-03-19 03:25:39 +08:00			`%2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec`
[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00			`V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec`
			`DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)`
			`undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec`
			`DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)`
			`%4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)`
			`%5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)`
			`$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc`
			`S_CBRANCH_VCCNZ %bb.1, implicit $vcc`
			`S_BRANCH %bb.1`
			`...`

			`# GCN-LABEL: name: ds_combine_subreg{{$}}`

			`# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,`
			`# VI: DS_READ2_B32 %3.sub0, 0, 8,`

			`# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,`
			`# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,`
			`---`
			`name: ds_combine_subreg`
			`body: \|`
			`bb.0:`
			`%0:vreg_64 = IMPLICIT_DEF`
			`S_BRANCH %bb.2`

			`bb.1:`
[AMDGPU] Add support for immediate operand for S_ENDPGM Summary: Add support for immediate operand in S_ENDPGM Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6 Reviewers: alexshap Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59213 llvm-svn: 355902 2019-03-12 17:52:58 +08:00			`S_ENDPGM 0`
[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00
			`bb.2:`
			`%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec`
[AMDGPU] Asm/disasm v_cndmask_b32_e64 with abs/neg source modifiers This commit allows v_cndmask_b32_e64 with abs, neg source modifiers on src0, src1 to be assembled and disassembled. This does appear to be allowed, even though they are floating point modifiers and the operand type is b32. To do this, I added src0_modifiers and src1_modifiers to the MachineInstr, which involved fixing up several places in codegen and mir tests. Differential Revision: https://reviews.llvm.org/D59191 Change-Id: I69bf4a8c73ebc65744f6110bb8fc4e937d79fbea llvm-svn: 356398 2019-03-19 03:25:39 +08:00			`%2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec`
[AMDGPU] Fix ds combine with subregs Differential Revision: https://reviews.llvm.org/D52522 llvm-svn: 343047 2018-09-26 07:33:18 +08:00			`V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec`
			`DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)`
			`undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec`
			`DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)`
			`%4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)`
			`%5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)`
			`$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc`
			`S_CBRANCH_VCCNZ %bb.1, implicit $vcc`
			`S_BRANCH %bb.1`
			`...`