From 6a87e0fc6ab4f71603ccfa5b61394c1fe6f6a44d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Jun 2019 20:35:18 +0000 Subject: [PATCH] [AMDGPU] Fix Livereg computation during epilogue insertion The LivePhysRegs calculated in order to find a scratch register in the epilogue code wrongly uses 'LiveIns'. Instead, it should use the 'Liveout' sets. For the liveness, also considering the operands of the terminator (return) instruction which is the insertion point for the scratch-exec-copy instruction. Patch by Christudasan Devadasan llvm-svn: 364470 --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 3 ++- llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 098152b23aac..3ed6d75c045d 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -678,7 +678,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, if (ScratchExecCopy == AMDGPU::NoRegister) { // See emitPrologue LivePhysRegs LiveRegs(*ST.getRegisterInfo()); - LiveRegs.addLiveIns(MBB); + LiveRegs.addLiveOuts(MBB); + LiveRegs.stepBackward(*MBBI); ScratchExecCopy = findScratchNonCalleeSaveRegister(MF, LiveRegs, diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index 5368eb20300b..ba099c33fc2b 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -489,6 +489,7 @@ define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 { ; GCN-NOT: s[8:9] ; GCN-NOT: s[10:11] ; GCN-NOT: s[12:13] +; GCN: s_or_saveexec_b64 s[6:7], -1 define hidden void @func_indirect_use_every_sgpr_input() #1 { call void @use_every_sgpr_input() ret void