2019-09-05 01:12:57 +08:00
|
|
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
|
|
|
|
|
|
|
|
# Test what happens when an SGPR is unavailable for the unused add
|
|
|
|
# carry out when materializing the frame index.
|
|
|
|
|
|
|
|
|
|
|
|
# There are truly no free SGPRs, so the entire frame index expansion
|
|
|
|
# needs to be inverted to restore the original frame register.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_no_sgprs
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
2019-09-05 01:12:57 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: liveins: $vgpr1, $vgpr2
|
|
|
|
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
2021-05-20 10:25:51 +08:00
|
|
|
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
|
|
|
; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
2021-02-03 00:08:57 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-11-10 08:40:35 +08:00
|
|
|
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $vgpr3 = COPY killed $sgpr33
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
|
|
|
; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
|
2021-05-20 10:25:51 +08:00
|
|
|
; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: S_ENDPGM 0, implicit $vcc
|
2021-04-21 22:32:00 +08:00
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-11-10 08:40:35 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
2019-09-05 01:12:57 +08:00
|
|
|
S_ENDPGM 0, implicit $vcc
|
|
|
|
...
|
|
|
|
|
|
|
|
# One 32-bit SGPR is available for the intermediate scale computation,
|
|
|
|
# so only an extra copy to VALU is necessary.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_one_sgpr
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
2019-09-05 01:12:57 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: liveins: $sgpr29, $vgpr1
|
|
|
|
; CHECK: $sgpr29 = frame-setup COPY $sgpr33
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
2021-02-03 00:08:57 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-11-10 08:40:35 +08:00
|
|
|
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $vgpr2 = COPY killed $sgpr33
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
|
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr33 = frame-destroy COPY $sgpr29
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: S_ENDPGM 0, implicit $vcc
|
2021-04-21 22:32:00 +08:00
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-11-10 08:40:35 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
|
2019-09-05 01:12:57 +08:00
|
|
|
S_ENDPGM 0, implicit $vcc
|
|
|
|
...
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
|
|
|
|
# When only one 64-bit SGPR is available for the unused carry out pre gfx9,
|
|
|
|
# we must reuse one of the 32-bit SGPR sub-regs to materialize the offset.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_one_sgpr_64
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: liveins: $sgpr28, $vgpr1
|
|
|
|
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
2021-02-03 00:08:57 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-11-10 08:40:35 +08:00
|
|
|
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $vgpr2 = COPY killed $sgpr29
|
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: S_ENDPGM 0, implicit $vcc
|
2021-04-21 22:32:00 +08:00
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-11-10 08:40:35 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
S_ENDPGM 0, implicit $vcc
|
|
|
|
...
|
|
|
|
|
|
|
|
# Prefer to use vcc as unused carry out.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_prefer_vcc
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: liveins: $sgpr28, $vgpr1
|
|
|
|
; CHECK: $sgpr28 = frame-setup COPY $sgpr33
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
|
2021-02-03 00:08:57 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
|
2020-11-10 08:40:35 +08:00
|
|
|
; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
2020-03-19 04:47:39 +08:00
|
|
|
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $vcc_lo = S_MOV_B32 8192
|
2020-07-14 21:18:36 +08:00
|
|
|
; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
|
2021-06-07 22:09:48 +08:00
|
|
|
; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc
|
2021-04-21 22:32:00 +08:00
|
|
|
; CHECK: $sgpr33 = frame-destroy COPY $sgpr28
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: S_ENDPGM 0
|
2021-04-21 22:32:00 +08:00
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
|
2020-11-10 08:40:35 +08:00
|
|
|
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
|
2021-04-21 22:32:00 +08:00
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
S_ENDPGM 0
|
|
|
|
...
|