forked from OSchip/llvm-project
[AMDGPU] Fix AGPR offset for waitcnt
An enum value stores the offset between AGPR ranges and VGPR ranges in the internal storage of SIInsertWaitcnts. It said 226 when it should say 256, causing some portion of the ranges to overlap. That in turn causes 'aliasing' between the registers, potentially inserting waitcnts that are not required. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D119749
This commit is contained in:
parent
8c06061372
commit
c87c61c52c
|
@ -119,7 +119,7 @@ static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = {
|
|||
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
|
||||
enum RegisterMapping {
|
||||
SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets.
|
||||
AGPR_OFFSET = 226, // Maximum programmable ArchVGPRs across all targets.
|
||||
AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets.
|
||||
SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
|
||||
NUM_EXTRA_VGPRS = 1, // A reserved slot for DS.
|
||||
EXTRA_VGPR_LDS = 0, // This is a placeholder the Shader algorithm uses.
|
||||
|
|
|
@ -335,7 +335,6 @@ body: |
|
|||
|
||||
---
|
||||
# agpr should be disjoint and tracked separately from vgpr
|
||||
# vgpr226 and agpr0 erroneously share waitcnt storage index, so a waitcnt is inserted before store of agpr0 when it is not needed
|
||||
|
||||
name: high_register_collision
|
||||
|
||||
|
@ -347,7 +346,6 @@ body: |
|
|||
; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
|
||||
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_WAITCNT 112
|
||||
; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
|
||||
|
|
Loading…
Reference in New Issue