RegAllocGreedy: Fix illegal eviction assert for urgent evictions

The condition in canEvictInterferenceBasedOnCost is slightly different
from the assertion in evictInteference.
canEvictInterferenceBasedOnCost uses a <= check for the cascade number
for legality, but the assert was checking for <. For equal cascade
numbers for an urgent eviction, canEvictInterferenceBasedOnCost could
return success. The actual eviction would then hit this assert. Avoid
ever returning true for equivalent cascade numbers.

The resulting failed allocation seems a bit off to me. e.g. in
illegal-eviction-assert.mir, I wuold assume %0 gets allocated starting
at $vgpr0. That was its initial allocation choice, but was later
evicted. In this example no evictions can help improve anything.
This commit is contained in:
Matt Arsenault 2022-03-29 08:48:21 -04:00
parent f6462a26f0
commit d4b1be20f6
3 changed files with 70 additions and 1 deletions

View File

@ -236,7 +236,10 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
if (Cascade <= IntfCascade) {
if (Cascade == IntfCascade)
return false;
if (Cascade < IntfCascade) {
if (!Urgent)
return false;
// We permit breaking cascades for urgent evictions. It should be the

View File

@ -0,0 +1,37 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - 2>%t.err %s | FileCheck %s
# RUN: FileCheck -check-prefix=ERR %s < %t.err
# This testcase cannot be compiled. An attempted eviction legality
# check was inconsistent with a later assertion when the eviction was
# performed.
# ERR: error: ran out of registers during register allocation
--- |
define void @foo() #0 {
ret void
}
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
...
# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3
# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
---
name: foo
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128, implicit-def %3:vreg_128, implicit-def %4:vreg_128
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
S_ENDPGM 0
...

View File

@ -0,0 +1,29 @@
; RUN: not llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -o - %s 2>%t.err | FileCheck %s
; RUN: FileCheck -check-prefix=ERR %s < %t.err
; This testcase would fail on an "illegal eviction". If the assert was
; relaxed to allow equivalent cascade numbers, it would infinite loop.
; ERR: error: inline assembly requires more registers than available
; ERR: error: inline assembly requires more registers than available
%asm.output = type { <16 x i32>, <8 x i32>, <5 x i32>, <4 x i32>, <16 x i32> }
; CHECK-LABEL: {{^}}illegal_eviction_assert:
; CHECK: ; def v[4:19] v[20:27] v[0:4] v[0:3] a[0:15]
; CHECK: ; clobber
; CHECK: ; use v[4:19] v[20:27] v[0:4] v[0:3] a[1:16]
define void @illegal_eviction_assert(<32 x i32> addrspace(1)* %arg) #0 {
;%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
%asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,={a[0:15]}"()
%vgpr0 = extractvalue %asm.output %asm, 0
%vgpr1 = extractvalue %asm.output %asm, 1
%vgpr2 = extractvalue %asm.output %asm, 2
%vgpr3 = extractvalue %asm.output %asm, 3
%agpr0 = extractvalue %asm.output %asm, 4
call void asm sideeffect "; clobber", "~{v[0:31]}"()
call void asm sideeffect "; use $0 $1 $2 $3 $4","v,v,v,v,{a[1:16]}"(<16 x i32> %vgpr0, <8 x i32> %vgpr1, <5 x i32> %vgpr2, <4 x i32> %vgpr3, <16 x i32> %agpr0)
ret void
}
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }