2017-12-05 00:22:32 +08:00
|
|
|
# RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
|
|
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
|
|
|
|
|
|
|
|
# SDWA-LABEL: {{^}}add_f16_u32_preserve
|
|
|
|
|
|
|
|
# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
|
|
|
|
# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
|
|
|
|
|
|
|
|
# SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3
|
|
|
|
# SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1
|
|
|
|
|
|
|
|
# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]]
|
|
|
|
|
|
|
|
---
|
|
|
|
name: add_f16_u32_preserve
|
|
|
|
tracksRegLiveness: true
|
|
|
|
registers:
|
|
|
|
- { id: 0, class: vreg_64 }
|
|
|
|
- { id: 1, class: vreg_64 }
|
|
|
|
- { id: 2, class: sreg_64 }
|
|
|
|
- { id: 3, class: vgpr_32 }
|
|
|
|
- { id: 4, class: vgpr_32 }
|
|
|
|
- { id: 5, class: vgpr_32 }
|
|
|
|
- { id: 6, class: vgpr_32 }
|
|
|
|
- { id: 7, class: vgpr_32 }
|
|
|
|
- { id: 8, class: vgpr_32 }
|
|
|
|
- { id: 9, class: vgpr_32 }
|
|
|
|
- { id: 10, class: vgpr_32 }
|
|
|
|
- { id: 11, class: vgpr_32 }
|
|
|
|
- { id: 12, class: vgpr_32 }
|
|
|
|
- { id: 13, class: vgpr_32 }
|
|
|
|
body: |
|
|
|
|
bb.0:
|
2018-02-01 06:04:26 +08:00
|
|
|
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
|
2017-12-05 00:22:32 +08:00
|
|
|
|
2018-02-01 06:04:26 +08:00
|
|
|
%2 = COPY $sgpr30_sgpr31
|
|
|
|
%1 = COPY $vgpr2_vgpr3
|
|
|
|
%0 = COPY $vgpr0_vgpr1
|
|
|
|
%3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
|
|
|
%4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
|
|
|
|
|
|
|
%5 = V_AND_B32_e32 65535, %3, implicit $exec
|
|
|
|
%6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
|
|
|
|
%7 = V_BFE_U32 %3, 8, 8, implicit $exec
|
|
|
|
%8 = V_LSHRREV_B32_e32 24, %4, implicit $exec
|
|
|
|
|
|
|
|
%9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $exec
|
|
|
|
%10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
|
|
|
|
%11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $exec
|
|
|
|
%12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
|
|
|
|
|
|
|
|
%13 = V_OR_B32_e64 %10, %12, implicit $exec
|
|
|
|
|
|
|
|
FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
|
|
|
$sgpr30_sgpr31 = COPY %2
|
|
|
|
S_SETPC_B64_return $sgpr30_sgpr31
|
[AMDGPU] Fix the SDWA Peephole phase to handle src for dst:UNUSED_PRESERVE.
Summary:
The phase attempts to transform operations that extract a portion of a value
into an SDWA src operand in cases where that value is used only once. It
was not prepared for this use to be the preserved portion of a value for
dst:UNUSED_PRESERVE, resulting in a crash or assert.
This change either rejects the illegal SDWA attempt, or in the case where
dst:WORD_1 and the src_sel would be WORD_0, removes the unneeded
extract instruction.
Reviewers: arsenm, #amdgpu
Reviewed By: arsenm, #amdgpu
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D44364
llvm-svn: 328856
2018-03-30 13:03:36 +08:00
|
|
|
|
|
|
|
---
|
|
|
|
# SDWA-LABEL: sdwa_preserve_keep
|
|
|
|
# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
|
|
|
|
# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
|
|
|
|
|
|
|
|
# SDWA: v_and_b32_e32 [[AND:v[0-9]+]], 0xff, [[FIRST]]
|
|
|
|
# SDWA: v_mov_b32_sdwa [[AND]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
|
|
|
|
|
|
|
|
# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[AND]]
|
|
|
|
|
|
|
|
name: sdwa_preserve_keep
|
|
|
|
tracksRegLiveness: true
|
|
|
|
registers:
|
|
|
|
- { id: 0, class: vreg_64 }
|
|
|
|
- { id: 1, class: vreg_64 }
|
|
|
|
- { id: 2, class: sreg_64 }
|
|
|
|
- { id: 3, class: vgpr_32 }
|
|
|
|
- { id: 4, class: vgpr_32 }
|
|
|
|
- { id: 5, class: sreg_32_xm0_xexec }
|
|
|
|
- { id: 6, class: vgpr_32 }
|
|
|
|
- { id: 7, class: vgpr_32 }
|
|
|
|
- { id: 8, class: sreg_32_xm0 }
|
|
|
|
- { id: 9, class: vgpr_32 }
|
|
|
|
- { id: 10, class: sreg_32_xm0 }
|
|
|
|
- { id: 11, class: vgpr_32 }
|
|
|
|
- { id: 17, class: vgpr_32 }
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
|
|
|
|
|
|
|
|
%2 = COPY $sgpr30_sgpr31
|
|
|
|
%1 = COPY $vgpr2_vgpr3
|
|
|
|
%0 = COPY $vgpr0_vgpr1
|
|
|
|
%3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
|
|
|
%4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
|
|
|
|
|
|
|
%9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
|
|
|
|
%10:sreg_32_xm0 = S_MOV_B32 255
|
|
|
|
%11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
|
|
|
|
%17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
|
|
|
|
FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
S_ENDPGM 0
|
[AMDGPU] Fix the SDWA Peephole phase to handle src for dst:UNUSED_PRESERVE.
Summary:
The phase attempts to transform operations that extract a portion of a value
into an SDWA src operand in cases where that value is used only once. It
was not prepared for this use to be the preserved portion of a value for
dst:UNUSED_PRESERVE, resulting in a crash or assert.
This change either rejects the illegal SDWA attempt, or in the case where
dst:WORD_1 and the src_sel would be WORD_0, removes the unneeded
extract instruction.
Reviewers: arsenm, #amdgpu
Reviewed By: arsenm, #amdgpu
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D44364
llvm-svn: 328856
2018-03-30 13:03:36 +08:00
|
|
|
|
|
|
|
...
|
|
|
|
---
|
|
|
|
# SDWA-LABEL: sdwa_preserve_remove
|
|
|
|
# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
|
|
|
|
# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
|
|
|
|
|
|
|
|
# SDWA: v_mov_b32_sdwa [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
|
|
|
|
|
|
|
|
# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[FIRST]]
|
|
|
|
|
|
|
|
name: sdwa_preserve_remove
|
|
|
|
tracksRegLiveness: true
|
|
|
|
registers:
|
|
|
|
- { id: 0, class: vreg_64 }
|
|
|
|
- { id: 1, class: vreg_64 }
|
|
|
|
- { id: 2, class: sreg_64 }
|
|
|
|
- { id: 3, class: vgpr_32 }
|
|
|
|
- { id: 4, class: vgpr_32 }
|
|
|
|
- { id: 5, class: sreg_32_xm0_xexec }
|
|
|
|
- { id: 6, class: vgpr_32 }
|
|
|
|
- { id: 7, class: vgpr_32 }
|
|
|
|
- { id: 8, class: sreg_32_xm0 }
|
|
|
|
- { id: 9, class: vgpr_32 }
|
|
|
|
- { id: 10, class: sreg_32_xm0 }
|
|
|
|
- { id: 11, class: vgpr_32 }
|
|
|
|
- { id: 17, class: vgpr_32 }
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
|
|
|
|
|
|
|
|
%2 = COPY $sgpr30_sgpr31
|
|
|
|
%1 = COPY $vgpr2_vgpr3
|
|
|
|
%0 = COPY $vgpr0_vgpr1
|
|
|
|
%3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
|
|
|
%4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
|
|
|
|
|
|
|
%9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
|
|
|
|
%10:sreg_32_xm0 = S_MOV_B32 65535
|
|
|
|
%11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
|
|
|
|
%17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
|
|
|
|
FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
S_ENDPGM 0
|
[AMDGPU] Fix the SDWA Peephole phase to handle src for dst:UNUSED_PRESERVE.
Summary:
The phase attempts to transform operations that extract a portion of a value
into an SDWA src operand in cases where that value is used only once. It
was not prepared for this use to be the preserved portion of a value for
dst:UNUSED_PRESERVE, resulting in a crash or assert.
This change either rejects the illegal SDWA attempt, or in the case where
dst:WORD_1 and the src_sel would be WORD_0, removes the unneeded
extract instruction.
Reviewers: arsenm, #amdgpu
Reviewed By: arsenm, #amdgpu
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D44364
llvm-svn: 328856
2018-03-30 13:03:36 +08:00
|
|
|
|
|
|
|
...
|