RegAlloc: Fix remaining virtual registers after allocation failure

This testcase fails register allocation, but at the failure point
there were also new split virtual registers. Previously this was
assigning the failing register and not enqueueing the newly created
split virtual registers. These would then never be allocated and
assert in VirtRegRewriter.
This commit is contained in:
Matt Arsenault 2022-03-26 11:01:53 -04:00
parent 2a6669060f
commit 1732242bee
2 changed files with 48 additions and 4 deletions

View File

@ -140,10 +140,7 @@ void RegAllocBase::allocatePhysRegs() {
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front());
continue;
}
if (AvailablePhysReg)
} else if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
for (Register Reg : SplitVRegs) {

View File

@ -0,0 +1,47 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>%t.err | FileCheck %s
; RUN: FileCheck -check-prefix=ERR %s < %t.err
; This testcase fails register allocation at the same time it performs
; virtual register splitting (by introducing VGPR to AGPR copies). We
; still need to enqueue and allocate the newly split vregs after the
; failure.
; ERR: error: ran out of registers during register allocation
; ERR-NEXT: error: ran out of registers during register allocation
; ERR-NEXT: error: ran out of registers during register allocation
; ERR-NOT: ERROR
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_write_b32
; CHECK: v_accvgpr_read_b32
; CHECK: v_accvgpr_read_b32
; CHECK: v_accvgpr_read_b32
; CHECK: v_accvgpr_read_b32
; CHECK: v_accvgpr_read_b32
; CHECK: v_accvgpr_read_b32
; CHECK: v_accvgpr_read_b32
define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 {
%agpr0 = call float asm sideeffect "; def $0", "=${a0}"()
%agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0
%mfma0 = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr.vec, i32 0, i32 0, i32 0)
%mfma0.3 = extractelement <16 x float> %mfma0, i32 3
%insert = insertelement <16 x float> %mfma0, float %agpr0, i32 8
%mfma1 = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %insert, i32 0, i32 0, i32 0)
%mfma1.3 = extractelement <16 x float> %mfma1, i32 3
call void asm sideeffect "; use $0", "{a1}"(float %mfma1.3)
ret void
}
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
attributes #1 = { convergent nounwind readnone willreturn }
attributes #2 = { nounwind readnone speculatable willreturn }