diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0c40735b7542..1779e746bad3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -614,7 +614,8 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII, // Registers in the sequence are allocated contiguously so we can just // use register number to pick one of three round-robin temps. unsigned RegNo = DestReg % 3; - Register Tmp = AMDGPU::VGPR32; + Register Tmp = + MBB.getParent()->getInfo()->getVGPRForAGPRCopy(); assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && "VGPR used for an intermediate copy should have been reserved."); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index d5b5360dacee..80575f36fa02 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -187,6 +187,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); if (!S.empty()) S.consumeInteger(0, GDSSize); + + // On GFX908, in order to guarantee copying between AGPRs, we need a scratch + // VGPR available at all times. + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + VGPRForAGPRCopy = AMDGPU::VGPR_32RegClass.getRegister(32); + } } void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 62d9ebe66e93..1254fbe4fb74 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -493,6 +493,16 @@ private: // frame, so save it here and add it to the RegScavenger later. Optional ScavengeFI; +private: + Register VGPRForAGPRCopy; + +public: + Register getVGPRForAGPRCopy() const { + assert(VGPRForAGPRCopy && + "Valid VGPR for AGPR copy must have been identified by now"); + return VGPRForAGPRCopy; + } + public: // FIXME /// If this is set, an SGPR used for save/restore of the register used for the /// frame pointer. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 9321828299e1..d12d5ccb8f3f 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -698,7 +698,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // On GFX908, in order to guarantee copying between AGPRs, we need a scratch // VGPR available at all times. if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { - reserveRegisterTuples(Reserved, AMDGPU::VGPR32); + reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); } for (auto Reg : MFI->WWMReservedRegs) { @@ -1553,8 +1553,8 @@ void SIRegisterInfo::buildSpillLoadStore( assert(EltSize == 4); if (!TmpIntermediateVGPR) { - assert(MF->getRegInfo().isReserved(AMDGPU::VGPR32)); - TmpIntermediateVGPR = AMDGPU::VGPR32; + TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy(); + assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR)); } if (IsStore) { auto AccRead = BuildMI(MBB, MI, DL,