2016-10-28 04:39:09 +08:00
|
|
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
|
2016-10-28 07:05:31 +08:00
|
|
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
|
|
|
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
|
2017-02-19 02:29:53 +08:00
|
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
|
2016-10-08 07:42:48 +08:00
|
|
|
|
2016-10-15 08:58:14 +08:00
|
|
|
--- |
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @div_fmas() { ret void }
|
|
|
|
define amdgpu_kernel void @s_getreg() { ret void }
|
|
|
|
define amdgpu_kernel void @s_setreg() { ret void }
|
|
|
|
define amdgpu_kernel void @vmem_gt_8dw_store() { ret void }
|
|
|
|
define amdgpu_kernel void @readwrite_lane() { ret void }
|
|
|
|
define amdgpu_kernel void @rfe() { ret void }
|
|
|
|
define amdgpu_kernel void @s_mov_fed_b32() { ret void }
|
|
|
|
define amdgpu_kernel void @s_movrel() { ret void }
|
|
|
|
define amdgpu_kernel void @v_interp() { ret void }
|
2017-08-04 09:09:43 +08:00
|
|
|
define amdgpu_kernel void @dpp() { ret void }
|
2017-02-19 02:29:53 +08:00
|
|
|
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
|
2017-02-19 02:29:53 +08:00
|
|
|
entry:
|
|
|
|
%A.addr = alloca i32 addrspace(1)*, align 4
|
|
|
|
store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
|
|
|
|
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
|
|
|
|
|
|
!llvm.dbg.cu = !{!0}
|
|
|
|
!llvm.module.flags = !{!3, !4}
|
|
|
|
|
|
|
|
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
|
|
|
!1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
|
|
|
|
!2 = !{}
|
|
|
|
!3 = !{i32 2, !"Dwarf Version", i32 2}
|
|
|
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
|
|
!5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
|
|
|
|
!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
|
|
|
!7 = !DISubroutineType(types: !8)
|
|
|
|
!8 = !{null, !9}
|
|
|
|
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
|
|
|
|
!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
|
|
|
!11 = !DIExpression()
|
|
|
|
!12 = !DILocation(line: 1, column: 30, scope: !6)
|
|
|
|
|
2016-10-15 08:58:14 +08:00
|
|
|
...
|
|
|
|
---
|
2016-10-28 04:39:09 +08:00
|
|
|
# GCN-LABEL: name: div_fmas
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_MOV_B64
|
|
|
|
# GCN-NOT: S_NOP
|
|
|
|
# GCN: V_DIV_FMAS
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: V_CMP_EQ_I32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_DIV_FMAS_F32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.2:
|
|
|
|
# GCN: V_CMP_EQ_I32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_DIV_FMAS_F32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.3:
|
|
|
|
# GCN: V_DIV_SCALE_F32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_DIV_FMAS_F32
|
2016-10-15 08:58:14 +08:00
|
|
|
name: div_fmas
|
2016-10-08 07:42:48 +08:00
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
%vcc = S_MOV_B64 0
|
|
|
|
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec
|
|
|
|
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
|
|
|
|
bb.2:
|
|
|
|
%vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec
|
|
|
|
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
|
|
|
S_BRANCH %bb.3
|
|
|
|
|
|
|
|
bb.3:
|
2017-01-19 14:04:12 +08:00
|
|
|
%vgpr4, %vcc = V_DIV_SCALE_F32 %vgpr1, %vgpr1, %vgpr3, implicit %exec
|
2016-10-08 07:42:48 +08:00
|
|
|
%vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
|
|
|
|
S_ENDPGM
|
2016-10-15 08:58:14 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
2016-10-28 04:39:09 +08:00
|
|
|
# GCN-LABEL: name: s_getreg
|
2016-10-15 08:58:14 +08:00
|
|
|
|
2016-10-28 04:39:09 +08:00
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# GCN: S_GETREG
|
2016-10-15 08:58:14 +08:00
|
|
|
|
2016-10-28 04:39:09 +08:00
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: S_SETREG_IMM32
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# GCN: S_GETREG
|
2016-10-15 08:58:14 +08:00
|
|
|
|
2016-10-28 04:39:09 +08:00
|
|
|
# GCN-LABEL: bb.2:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# GCN: S_GETREG
|
2016-10-15 08:58:14 +08:00
|
|
|
|
2016-10-28 04:39:09 +08:00
|
|
|
# GCN-LABEL: bb.3:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN-NEXT: S_GETREG
|
2016-10-15 08:58:14 +08:00
|
|
|
|
|
|
|
name: s_getreg
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
S_SETREG_B32 %sgpr0, 1
|
|
|
|
%sgpr1 = S_GETREG_B32 1
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
S_SETREG_IMM32_B32 0, 1
|
|
|
|
%sgpr1 = S_GETREG_B32 1
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
|
|
|
|
bb.2:
|
|
|
|
S_SETREG_B32 %sgpr0, 1
|
|
|
|
%sgpr1 = S_MOV_B32 0
|
|
|
|
%sgpr2 = S_GETREG_B32 1
|
|
|
|
S_BRANCH %bb.3
|
|
|
|
|
|
|
|
bb.3:
|
|
|
|
S_SETREG_B32 %sgpr0, 0
|
|
|
|
%sgpr1 = S_GETREG_B32 1
|
|
|
|
S_ENDPGM
|
2016-10-08 07:42:48 +08:00
|
|
|
...
|
2016-10-28 04:39:09 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# GCN-LABEL: name: s_setreg
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# VI: S_NOP 0
|
|
|
|
# GCN-NEXT: S_SETREG
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN: S_NOP 0
|
|
|
|
# VI: S_NOP 0
|
|
|
|
# GCN-NEXT: S_SETREG
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.2:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN-NEXT: S_SETREG
|
|
|
|
|
|
|
|
name: s_setreg
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
S_SETREG_B32 %sgpr0, 1
|
|
|
|
S_SETREG_B32 %sgpr1, 1
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
S_SETREG_B32 %sgpr0, 64
|
|
|
|
S_SETREG_B32 %sgpr1, 128
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
|
|
|
|
bb.2:
|
|
|
|
S_SETREG_B32 %sgpr0, 1
|
|
|
|
S_SETREG_B32 %sgpr1, 0
|
|
|
|
S_ENDPGM
|
|
|
|
...
|
2016-10-28 07:05:31 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# GCN-LABEL: name: vmem_gt_8dw_store
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: BUFFER_STORE_DWORD_OFFSET
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: BUFFER_STORE_DWORDX3_OFFSET
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: BUFFER_STORE_DWORDX4_OFFSET
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: BUFFER_STORE_DWORDX4_OFFSET
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: FLAT_STORE_DWORDX2
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: FLAT_STORE_DWORDX3
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: FLAT_STORE_DWORDX4
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: FLAT_ATOMIC_CMPSWAP_X2
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
|
|
|
|
# CIVI: S_NOP
|
|
|
|
# GCN: V_MOV_B32
|
|
|
|
|
|
|
|
name: vmem_gt_8dw_store
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
BUFFER_STORE_FORMAT_XYZ_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
BUFFER_STORE_FORMAT_XYZW_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit %exec
|
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
2017-06-12 23:55:58 +08:00
|
|
|
FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
|
2016-10-28 07:05:31 +08:00
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
2017-06-12 23:55:58 +08:00
|
|
|
FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
|
2016-10-28 07:05:31 +08:00
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
2017-06-12 23:55:58 +08:00
|
|
|
FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
|
2016-10-28 07:05:31 +08:00
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
2017-06-12 23:55:58 +08:00
|
|
|
FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
|
2016-10-28 07:05:31 +08:00
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
2017-06-12 23:55:58 +08:00
|
|
|
FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
|
2016-10-28 07:05:31 +08:00
|
|
|
%vgpr3 = V_MOV_B32_e32 0, implicit %exec
|
|
|
|
S_ENDPGM
|
|
|
|
|
|
|
|
...
|
2016-10-28 07:42:29 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: name: readwrite_lane
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: V_ADD_I32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_READLANE_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: V_ADD_I32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_WRITELANE_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.2:
|
|
|
|
# GCN: V_ADD_I32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_READLANE_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.3:
|
|
|
|
# GCN: V_ADD_I32
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: S_NOP
|
|
|
|
# GCN: V_WRITELANE_B32
|
|
|
|
|
|
|
|
name: readwrite_lane
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
%vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec
|
|
|
|
%sgpr4 = V_READLANE_B32 %vgpr4, %sgpr0
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
%vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec
|
|
|
|
%vgpr4 = V_WRITELANE_B32 %sgpr0, %sgpr0
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
|
|
|
|
bb.2:
|
|
|
|
%vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec
|
|
|
|
%sgpr4 = V_READLANE_B32 %vgpr4, %vcc_lo
|
|
|
|
S_BRANCH %bb.3
|
|
|
|
|
|
|
|
bb.3:
|
|
|
|
%vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec
|
|
|
|
%vgpr4 = V_WRITELANE_B32 %sgpr4, %vcc_lo
|
|
|
|
S_ENDPGM
|
|
|
|
|
|
|
|
...
|
2016-10-28 07:50:21 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: name: rfe
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# VI: S_NOP
|
|
|
|
# GCN-NEXT: S_RFE_B64
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: S_SETREG
|
|
|
|
# GCN-NEXT: S_RFE_B64
|
|
|
|
|
|
|
|
name: rfe
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
S_SETREG_B32 %sgpr0, 3
|
|
|
|
S_RFE_B64 %sgpr2_sgpr3
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
S_SETREG_B32 %sgpr0, 0
|
|
|
|
S_RFE_B64 %sgpr2_sgpr3
|
|
|
|
S_ENDPGM
|
|
|
|
|
|
|
|
...
|
2017-02-19 02:29:53 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: name: s_mov_fed_b32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_MOV_FED_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: S_MOV_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: S_MOV_FED_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: V_MOV_B32
|
|
|
|
name: s_mov_fed_b32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
%sgpr0 = S_MOV_FED_B32 %sgpr0
|
|
|
|
%sgpr0 = S_MOV_B32 %sgpr0
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
%sgpr0 = S_MOV_FED_B32 %sgpr0
|
|
|
|
%vgpr0 = V_MOV_B32_e32 %sgpr0, implicit %exec
|
|
|
|
S_ENDPGM
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: name: s_movrel
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: S_MOVRELS_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: S_MOVRELS_B64
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.2:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: S_MOVRELD_B32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.3:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: S_MOVRELD_B64
|
|
|
|
|
|
|
|
name: s_movrel
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
|
|
|
|
bb.2:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0
|
|
|
|
S_BRANCH %bb.3
|
|
|
|
|
|
|
|
bb.3:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%sgpr0_sgpr1 = S_MOVRELD_B64 %sgpr0_sgpr1, implicit %m0
|
|
|
|
S_ENDPGM
|
|
|
|
...
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: name: v_interp
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.0:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: V_INTERP_P1_F32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.1:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: V_INTERP_P2_F32
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.2:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: V_INTERP_P1_F32_16bank
|
|
|
|
|
|
|
|
# GCN-LABEL: bb.3:
|
|
|
|
# GCN: S_MOV_B32
|
|
|
|
# GFX9: S_NOP
|
|
|
|
# GCN-NEXT: V_INTERP_MOV_F32
|
|
|
|
|
|
|
|
name: v_interp
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec
|
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec
|
|
|
|
S_BRANCH %bb.2
|
|
|
|
|
|
|
|
bb.2:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec
|
|
|
|
S_BRANCH %bb.3
|
|
|
|
|
|
|
|
bb.3:
|
|
|
|
%m0 = S_MOV_B32 0
|
|
|
|
%vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec
|
|
|
|
S_ENDPGM
|
|
|
|
...
|
2017-08-04 09:09:43 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GCN-LABEL: name: dpp
|
|
|
|
|
|
|
|
# VI-LABEL: bb.0:
|
|
|
|
# VI: V_MOV_B32_e32
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: V_MOV_B32_dpp
|
|
|
|
|
|
|
|
# VI-LABEL: bb.1:
|
|
|
|
# VI: V_CMPX_EQ_I32_e32
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: S_NOP 0
|
|
|
|
# VI-NEXT: V_MOV_B32_dpp
|
|
|
|
|
|
|
|
name: dpp
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
%vgpr1 = V_MOV_B32_dpp %vgpr1, %vgpr0, 0, 15, 15, 0, implicit %exec
|
2017-08-04 09:09:43 +08:00
|
|
|
S_BRANCH %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
implicit %exec, implicit %vcc = V_CMPX_EQ_I32_e32 %vgpr0, %vgpr1, implicit %exec
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
%vgpr3 = V_MOV_B32_dpp %vgpr3, %vgpr0, 0, 15, 15, 0, implicit %exec
|
2017-08-04 09:09:43 +08:00
|
|
|
S_ENDPGM
|
|
|
|
...
|
2017-02-19 02:29:53 +08:00
|
|
|
---
|
|
|
|
name: mov_fed_hazard_crash_on_dbg_value
|
|
|
|
alignment: 0
|
|
|
|
exposesReturnsTwice: false
|
|
|
|
legalized: false
|
|
|
|
regBankSelected: false
|
|
|
|
selected: false
|
|
|
|
tracksRegLiveness: true
|
|
|
|
liveins:
|
|
|
|
- { reg: '%sgpr4_sgpr5' }
|
|
|
|
- { reg: '%sgpr6_sgpr7' }
|
|
|
|
- { reg: '%sgpr9' }
|
|
|
|
- { reg: '%sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
|
|
frameInfo:
|
|
|
|
isFrameAddressTaken: false
|
|
|
|
isReturnAddressTaken: false
|
|
|
|
hasStackMap: false
|
|
|
|
hasPatchPoint: false
|
|
|
|
stackSize: 16
|
|
|
|
offsetAdjustment: 0
|
|
|
|
maxAlignment: 8
|
|
|
|
adjustsStack: false
|
|
|
|
hasCalls: false
|
|
|
|
maxCallFrameSize: 0
|
|
|
|
hasOpaqueSPAdjustment: false
|
|
|
|
hasVAStart: false
|
|
|
|
hasMustTailInVarArgFunc: false
|
|
|
|
stack:
|
|
|
|
- { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
|
|
|
|
- { id: 1, offset: 8, size: 4, alignment: 4 }
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
|
|
|
liveins: %sgpr4_sgpr5, %sgpr6_sgpr7, %sgpr9, %sgpr0_sgpr1_sgpr2_sgpr3
|
|
|
|
|
|
|
|
%flat_scr_lo = S_ADD_U32 %sgpr6, %sgpr9, implicit-def %scc
|
|
|
|
%flat_scr_hi = S_ADDC_U32 %sgpr7, 0, implicit-def %scc, implicit %scc
|
|
|
|
DBG_VALUE _, 2, !5, !11, debug-location !12
|
|
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
|
|
|
dead %sgpr6_sgpr7 = KILL %sgpr4_sgpr5
|
|
|
|
%sgpr8 = S_MOV_B32 %sgpr5
|
|
|
|
%vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
|
|
|
|
BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 4, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr + 4)
|
|
|
|
%sgpr8 = S_MOV_B32 %sgpr4, implicit killed %sgpr4_sgpr5
|
|
|
|
%vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
|
|
|
|
BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr)
|
|
|
|
S_ENDPGM
|
|
|
|
|
|
|
|
...
|