2018-06-01 04:13:51 +08:00
# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=phi-node-elimination,si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: {{^}}name: vector_clause{{$}}
# GCN: early-clobber %2:vreg_128, early-clobber %4:vreg_128, early-clobber %1:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
2019-05-01 06:08:23 +08:00
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
---
name: vector_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: subreg_full{{$}}
# GCN: early-clobber %1:vreg_128 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
2019-05-01 06:08:23 +08:00
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
---
name: subreg_full
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
%1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: subreg_part{{$}}
# GCN: undef early-clobber %1.sub0_sub1:vreg_128, undef early-clobber %1.sub3:vreg_128 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
2019-05-01 06:08:23 +08:00
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
---
name: subreg_part
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: dead{{$}}
# GCN: dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
---
name: dead
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: subreg_dead{{$}}
# GCN: early-clobber %1:vreg_64 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
2019-05-01 06:08:23 +08:00
# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
---
name: subreg_dead
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: kill{{$}}
# GCN: early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, %1, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
---
name: kill
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: indirect{{$}}
2019-05-01 06:08:23 +08:00
# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %1, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
---
name: indirect
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
%1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: stack{{$}}
# GCN: %0:vreg_64 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
---
name: stack
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
stack:
- { id: 0, type: default, offset: 0, size: 64, alignment: 8 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: overflow_counter{{$}}
# GCN: dead early-clobber %7:vgpr_32, dead early-clobber %14:vgpr_32, dead early-clobber %2:vgpr_32, dead early-clobber %9:vgpr_32, dead early-clobber %4:vgpr_32, dead early-clobber %11:vgpr_32, dead early-clobber %6:vgpr_32, dead early-clobber %13:vgpr_32, dead early-clobber %1:vgpr_32, dead early-clobber %8:vgpr_32, dead early-clobber %15:vgpr_32, dead early-clobber %3:vgpr_32, dead early-clobber %10:vgpr_32, dead early-clobber %5:vgpr_32, dead early-clobber %12:vgpr_32 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
# GCN-NEXT: dead early-clobber %16:vgpr_32, dead early-clobber %17:vgpr_32 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
---
name: overflow_counter
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: vgpr_32 }
- { id: 5, class: vgpr_32 }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
- { id: 8, class: vgpr_32 }
- { id: 9, class: vgpr_32 }
- { id: 10, class: vgpr_32 }
- { id: 11, class: vgpr_32 }
- { id: 12, class: vgpr_32 }
- { id: 13, class: vgpr_32 }
- { id: 14, class: vgpr_32 }
- { id: 15, class: vgpr_32 }
- { id: 16, class: vgpr_32 }
- { id: 17, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
%1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec
%2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec
%3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec
%5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec
%6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec
%7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec
%8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec
%9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec
%10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec
%11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec
%12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec
%13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec
%14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec
%15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec
%16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec
%17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: reg_pressure{{$}}
# GCN: dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vreg_128 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
# GCN-NEXT: dead early-clobber %7:vreg_128, dead early-clobber %6:vreg_128 = BUNDLE %0, implicit $exec {
2019-05-01 06:08:23 +08:00
# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
---
name: reg_pressure
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vreg_128 }
- { id: 6, class: vreg_128 }
- { id: 7, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
2019-05-01 06:08:23 +08:00
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec
%6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec
%7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: image_clause{{$}}
2019-10-10 15:11:33 +08:00
# GCN: early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sgpr_128, %1, implicit $exec {
# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
---
name: image_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: sreg_256 }
2019-10-10 15:11:33 +08:00
- { id: 2, class: sgpr_128 }
2018-06-01 04:13:51 +08:00
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
2019-10-10 15:11:33 +08:00
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: mixed_clause{{$}}
# GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2019-05-01 06:08:23 +08:00
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
[AMDGPU] Extend buffer intrinsics with swizzling
Summary:
Extend cachepolicy operand in the new VMEM buffer intrinsics
to supply information whether the buffer data is swizzled.
Also, propagate this information to MIR.
Intrinsics updated:
int_amdgcn_raw_buffer_load
int_amdgcn_raw_buffer_load_format
int_amdgcn_raw_buffer_store
int_amdgcn_raw_buffer_store_format
int_amdgcn_raw_tbuffer_load
int_amdgcn_raw_tbuffer_store
int_amdgcn_struct_buffer_load
int_amdgcn_struct_buffer_load_format
int_amdgcn_struct_buffer_store
int_amdgcn_struct_buffer_store_format
int_amdgcn_struct_tbuffer_load
int_amdgcn_struct_tbuffer_store
Furthermore, disable merging of VMEM buffer instructions
in SI Load/Store optimizer, if the "swizzled" bit on the instruction
is on.
The default value of the bit is 0, meaning that data in buffer
is linear and buffer instructions can be merged.
There is no difference in the generated code with this commit.
However, in the future it will be expected that front-ends
use buffer intrinsics with correct "swizzled" bit set.
Reviewers: arsenm, nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, arphaman, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68200
llvm-svn: 373491
2019-10-03 01:22:36 +08:00
# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
# GCN-NEXT: }
---
name: mixed_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: sreg_256 }
2019-10-10 15:11:33 +08:00
- { id: 2, class: sgpr_128 }
2018-06-01 04:13:51 +08:00
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
AMDGPU: Turn D16 for MIMG instructions into a regular operand
Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.
We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.
We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.
The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.
As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).
While we're at it, delete a whole bunch of dead legacy TableGen code.
Change-Id: I89b02c2841c06f95e662541433e597f5d4553978
Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor
Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47434
llvm-svn: 335222
2018-06-21 21:36:01 +08:00
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2019-05-01 06:08:23 +08:00
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
[AMDGPU] Extend buffer intrinsics with swizzling
Summary:
Extend cachepolicy operand in the new VMEM buffer intrinsics
to supply information whether the buffer data is swizzled.
Also, propagate this information to MIR.
Intrinsics updated:
int_amdgcn_raw_buffer_load
int_amdgcn_raw_buffer_load_format
int_amdgcn_raw_buffer_store
int_amdgcn_raw_buffer_store_format
int_amdgcn_raw_tbuffer_load
int_amdgcn_raw_tbuffer_store
int_amdgcn_struct_buffer_load
int_amdgcn_struct_buffer_load_format
int_amdgcn_struct_buffer_store
int_amdgcn_struct_buffer_store_format
int_amdgcn_struct_tbuffer_load
int_amdgcn_struct_tbuffer_store
Furthermore, disable merging of VMEM buffer instructions
in SI Load/Store optimizer, if the "swizzled" bit on the instruction
is on.
The default value of the bit is 0, meaning that data in buffer
is linear and buffer instructions can be merged.
There is no difference in the generated code with this commit.
However, in the future it will be expected that front-ends
use buffer intrinsics with correct "swizzled" bit set.
Reviewers: arsenm, nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, arphaman, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68200
llvm-svn: 373491
2019-10-03 01:22:36 +08:00
%5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
2018-06-01 04:13:51 +08:00
...
# GCN-LABEL: {{^}}name: atomic{{$}}
# GCN: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: dead %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: dead %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
# GCN-NEXT: S_ENDPGM 0
2018-06-01 04:13:51 +08:00
---
name: atomic
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 0, implicit $exec, implicit $flat_scr
%3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 0, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
S_ENDPGM 0
2018-06-01 04:13:51 +08:00
...