2019-09-05 01:12:57 +08:00
|
|
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
|
|
|
|
|
|
|
|
# Test what happens when an SGPR is unavailable for the unused add
|
|
|
|
# carry out when materializing the frame index.
|
|
|
|
|
|
|
|
|
|
|
|
# There are truly no free SGPRs, so the entire frame index expansion
|
|
|
|
# needs to be inverted to restore the original frame register.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_no_sgprs
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
2019-09-05 01:12:57 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
|
|
|
|
; CHECK: liveins: $vgpr1
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
|
|
|
; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
|
|
|
|
; CHECK: $vgpr2 = COPY killed $sgpr33
|
|
|
|
; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
|
|
|
|
; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
|
|
|
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup COPY $sgpr27
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: S_ENDPGM 0, implicit $vcc
|
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
|
|
|
|
S_ENDPGM 0, implicit $vcc
|
|
|
|
...
|
|
|
|
|
|
|
|
# One 32-bit SGPR is available for the intermediate scale computation,
|
|
|
|
# so only an extra copy to VALU is necessary.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_one_sgpr
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
2019-09-05 01:12:57 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
|
|
|
|
; CHECK: liveins: $vgpr1
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc
|
|
|
|
; CHECK: $vgpr2 = COPY killed $sgpr29
|
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31
|
|
|
|
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup COPY $sgpr27
|
2019-09-05 01:12:57 +08:00
|
|
|
; CHECK: S_ENDPGM 0, implicit $vcc
|
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31
|
|
|
|
S_ENDPGM 0, implicit $vcc
|
|
|
|
...
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
|
|
|
|
# When only one 64-bit SGPR is available for the unused carry out pre gfx9,
|
|
|
|
# we must reuse one of the 32-bit SGPR sub-regs to materialize the offset.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_one_sgpr_64
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
|
|
|
|
; CHECK: liveins: $vgpr1
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
2020-03-19 04:47:39 +08:00
|
|
|
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $sgpr28 = S_MOV_B32 8192
|
2020-03-19 04:47:39 +08:00
|
|
|
; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr2, 0, implicit $exec
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
|
|
|
|
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup COPY $sgpr27
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: S_ENDPGM 0, implicit $vcc
|
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
|
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
|
|
|
|
S_ENDPGM 0, implicit $vcc
|
|
|
|
...
|
|
|
|
|
|
|
|
# Prefer to use vcc as unused carry out.
|
|
|
|
|
|
|
|
---
|
|
|
|
name: scavenge_sgpr_pei_prefer_vcc
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
stack:
|
|
|
|
- { id: 0, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
- { id: 1, type: default, offset: 0, size: 4, alignment: 8192 }
|
|
|
|
|
|
|
|
machineFunctionInfo:
|
|
|
|
isEntryFunction: false
|
|
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
2020-03-04 09:39:47 +08:00
|
|
|
frameOffsetReg: $sgpr33
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
stackPtrOffsetReg: $sgpr32
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
|
|
|
|
; CHECK: liveins: $vgpr1
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr27 = frame-setup COPY $sgpr33
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
|
|
|
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
|
2020-03-19 04:47:39 +08:00
|
|
|
; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $vcc_lo = S_MOV_B32 8192
|
2020-03-19 04:47:39 +08:00
|
|
|
; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
|
|
|
|
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
2020-03-04 09:39:47 +08:00
|
|
|
; CHECK: $sgpr33 = frame-setup COPY $sgpr27
|
AMDGPU: Reuse carry out register during FI elimination
Summary:
Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add.
If only one SGPR was available this crashed when trying to scavenge another
32bit SGPR to materialize the offset.
Instead, reuse a 32-bit SGPR from the carry out as the offset register.
Also prefer to use vcc for the unused carry out when it is available.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70614
2019-11-23 04:25:13 +08:00
|
|
|
; CHECK: S_ENDPGM 0
|
|
|
|
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
|
|
|
|
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
|
|
|
|
S_ENDPGM 0
|
|
|
|
...
|