llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//


#include "SIMachineFunctionInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"

#define MAX_LANES 64

using namespace llvm;


// Pin the vtable to this file.
void SIMachineFunctionInfo::anchor() {}

SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  : AMDGPUMachineFunction(MF),
    TIDReg(AMDGPU::NoRegister),
    HasSpilledVGPRs(false),
    PSInputAddr(0),
    NumUserSGPRs(0),
    LDSWaveSpillSize(0) { }

SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
                                                       MachineFunction *MF,
                                                       unsigned FrameIndex,
                                                       unsigned SubIdx) {
  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
  MachineRegisterInfo &MRI = MF->getRegInfo();
  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
  Offset += SubIdx * 4;

  unsigned LaneVGPRIdx = Offset / (64 * 4);
  unsigned Lane = (Offset / 4) % 64;

  struct SpilledReg Spill;

  if (!LaneVGPRs.count(LaneVGPRIdx)) {
    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;

    // Add this register as live-in to all blocks to avoid machine verifer
    // complaining about use of an undefined physical register.
    for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
         BI != BE; ++BI) {
      BI->addLiveIn(LaneVGPR);
    }
  }

  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
  Spill.Lane = Lane;
  return Spill;
}

unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
                                              const MachineFunction &MF) const {
  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
  // FIXME: We should get this information from kernel attributes if it
  // is available.
  return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
}
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`/// \file`
			`//===----------------------------------------------------------------------===//`


			`#include "SIMachineFunctionInfo.h"`
R600/SI: Implement VGPR register spilling for compute at -O0 v3 VGPRs are spilled to LDS. This still needs more testing, but we need to at least enable it at -O0, because the fast register allocator spills all registers that are live at the end of blocks and without this some future commits will break the flat-address-space.ll test. v2: Only calculate thread id once v3: Move insertion of spill instructions to SIRegisterInfo::eliminateFrameIndex() llvm-svn: 218348 2014-09-24 09:33:17 +08:00			`#include "AMDGPUSubtarget.h"`
R600/SI: Only create one instruction when spilling/restoring register v3 The register spiller assumes that only one new instruction is created when spilling and restoring registers, so we need to emit pseudo instructions for vector register spills and lower them after register allocation. v2: - Fix calculation of lane index - Extend VGPR liveness to end of program. v3: - Use SIMM16 field of S_NOP to specify multiple NOPs. https://bugs.freedesktop.org/show_bug.cgi?id=75005 llvm-svn: 207843 2014-05-02 23:41:42 +08:00			`#include "SIInstrInfo.h"`
R600/SI: Implement VGPR register spilling for compute at -O0 v3 VGPRs are spilled to LDS. This still needs more testing, but we need to at least enable it at -O0, because the fast register allocator spills all registers that are live at the end of blocks and without this some future commits will break the flat-address-space.ll test. v2: Only calculate thread id once v3: Move insertion of spill instructions to SIRegisterInfo::eliminateFrameIndex() llvm-svn: 218348 2014-09-24 09:33:17 +08:00			`#include "llvm/CodeGen/MachineInstrBuilder.h"`
R600/SI: Use eliminateFrameIndex() to expand SGPR spill pseudos This will simplify the SGPR spilling and also allow us to use MachineFrameInfo for calculating offsets, which should be more reliable than our custom code. This fixes a crash in some cases where a register would be spilled in a branch such that the VGPR defined for spilling did not dominate all the uses when restoring. This fixes a crash in an ocl conformance test. The test requries register spilling and is too big to include. llvm-svn: 216217 2014-08-22 04:40:54 +08:00			`#include "llvm/CodeGen/MachineFrameInfo.h"`
R600/SI: Implement spilling of SGPRs v5 SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions. v2: - Fix encoding of Lane Mask - Use correct register flags, so we don't overwrite the low dword when restoring multi-dword registers. v3: - Register spilling seems to hang the GPU, so replace all shaders that need spilling with a dummy shader. v4: - Fix *LANE definitions - Change destination reg class for 32-bit SMRD instructions v5: - Remove small optimization that was crashing Serious Sam 3. https://bugs.freedesktop.org/show_bug.cgi?id=68224 https://bugs.freedesktop.org/show_bug.cgi?id=71285 NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195880 2013-11-28 05:23:35 +08:00			`#include "llvm/CodeGen/MachineRegisterInfo.h"`
R600/SI: Only create one instruction when spilling/restoring register v3 The register spiller assumes that only one new instruction is created when spilling and restoring registers, so we need to emit pseudo instructions for vector register spills and lower them after register allocation. v2: - Fix calculation of lane index - Extend VGPR liveness to end of program. v3: - Use SIMM16 field of S_NOP to specify multiple NOPs. https://bugs.freedesktop.org/show_bug.cgi?id=75005 llvm-svn: 207843 2014-05-02 23:41:42 +08:00			`#include "llvm/IR/Function.h"`
			`#include "llvm/IR/LLVMContext.h"`
R600/SI: Implement spilling of SGPRs v5 SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions. v2: - Fix encoding of Lane Mask - Use correct register flags, so we don't overwrite the low dword when restoring multi-dword registers. v3: - Register spilling seems to hang the GPU, so replace all shaders that need spilling with a dummy shader. v4: - Fix *LANE definitions - Change destination reg class for 32-bit SMRD instructions v5: - Remove small optimization that was crashing Serious Sam 3. https://bugs.freedesktop.org/show_bug.cgi?id=68224 https://bugs.freedesktop.org/show_bug.cgi?id=71285 NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195880 2013-11-28 05:23:35 +08:00
			`#define MAX_LANES 64`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00
			`using namespace llvm;`

[weak vtables] Remove a bunch of weak vtables This patch removes most of the trivial cases of weak vtables by pinning them to a single object file. The memory leaks in this version have been fixed. Thanks Alexey for pointing them out. Differential Revision: http://llvm-reviews.chandlerc.com/D2068 Reviewed by Andy llvm-svn: 195064 2013-11-19 08:57:56 +08:00
			`// Pin the vtable to this file.`
			`void SIMachineFunctionInfo::anchor() {}`

Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)`
R600/SI: Share code recording ShaderTypeAttribute between generations llvm-svn: 178504 2013-04-02 05:47:53 +08:00			`: AMDGPUMachineFunction(MF),`
R600/SI: Implement VGPR register spilling for compute at -O0 v3 VGPRs are spilled to LDS. This still needs more testing, but we need to at least enable it at -O0, because the fast register allocator spills all registers that are live at the end of blocks and without this some future commits will break the flat-address-space.ll test. v2: Only calculate thread id once v3: Move insertion of spill instructions to SIRegisterInfo::eliminateFrameIndex() llvm-svn: 218348 2014-09-24 09:33:17 +08:00			`TIDReg(AMDGPU::NoRegister),`
R600/SI: Spill VGPRs to scratch space for compute shaders llvm-svn: 225988 2015-01-14 23:42:31 +08:00			`HasSpilledVGPRs(false),`
R600/SI: Implement spilling of SGPRs v5 SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions. v2: - Fix encoding of Lane Mask - Use correct register flags, so we don't overwrite the low dword when restoring multi-dword registers. v3: - Register spilling seems to hang the GPU, so replace all shaders that need spilling with a dummy shader. v4: - Fix *LANE definitions - Change destination reg class for 32-bit SMRD instructions v5: - Remove small optimization that was crashing Serious Sam 3. https://bugs.freedesktop.org/show_bug.cgi?id=68224 https://bugs.freedesktop.org/show_bug.cgi?id=71285 NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195880 2013-11-28 05:23:35 +08:00			`PSInputAddr(0),`
R600/SI: Implement VGPR register spilling for compute at -O0 v3 VGPRs are spilled to LDS. This still needs more testing, but we need to at least enable it at -O0, because the fast register allocator spills all registers that are live at the end of blocks and without this some future commits will break the flat-address-space.ll test. v2: Only calculate thread id once v3: Move insertion of spill instructions to SIRegisterInfo::eliminateFrameIndex() llvm-svn: 218348 2014-09-24 09:33:17 +08:00			`NumUserSGPRs(0),`
			`LDSWaveSpillSize(0) { }`
R600/SI: Use eliminateFrameIndex() to expand SGPR spill pseudos This will simplify the SGPR spilling and also allow us to use MachineFrameInfo for calculating offsets, which should be more reliable than our custom code. This fixes a crash in some cases where a register would be spilled in a branch such that the VGPR defined for spilling did not dominate all the uses when restoring. This fixes a crash in an ocl conformance test. The test requries register spilling and is too big to include. llvm-svn: 216217 2014-08-22 04:40:54 +08:00
			`SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(`
			`MachineFunction *MF,`
			`unsigned FrameIndex,`
			`unsigned SubIdx) {`
			`const MachineFrameInfo *FrameInfo = MF->getFrameInfo();`
Remove a few more calls to TargetMachine::getSubtarget from the R600 port. llvm-svn: 229804 2015-02-19 09:10:55 +08:00			`const SIRegisterInfo TRI = static_cast<const SIRegisterInfo >(`
			`MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());`
R600/SI: Use eliminateFrameIndex() to expand SGPR spill pseudos This will simplify the SGPR spilling and also allow us to use MachineFrameInfo for calculating offsets, which should be more reliable than our custom code. This fixes a crash in some cases where a register would be spilled in a branch such that the VGPR defined for spilling did not dominate all the uses when restoring. This fixes a crash in an ocl conformance test. The test requries register spilling and is too big to include. llvm-svn: 216217 2014-08-22 04:40:54 +08:00			`MachineRegisterInfo &MRI = MF->getRegInfo();`
			`int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);`
			`Offset += SubIdx * 4;`

			`unsigned LaneVGPRIdx = Offset / (64 * 4);`
			`unsigned Lane = (Offset / 4) % 64;`

			`struct SpilledReg Spill;`

			`if (!LaneVGPRs.count(LaneVGPRIdx)) {`
R600/SI: Spill VGPRs to scratch space for compute shaders llvm-svn: 225988 2015-01-14 23:42:31 +08:00			`unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);`
R600/SI: Use eliminateFrameIndex() to expand SGPR spill pseudos This will simplify the SGPR spilling and also allow us to use MachineFrameInfo for calculating offsets, which should be more reliable than our custom code. This fixes a crash in some cases where a register would be spilled in a branch such that the VGPR defined for spilling did not dominate all the uses when restoring. This fixes a crash in an ocl conformance test. The test requries register spilling and is too big to include. llvm-svn: 216217 2014-08-22 04:40:54 +08:00			`LaneVGPRs[LaneVGPRIdx] = LaneVGPR;`

			`// Add this register as live-in to all blocks to avoid machine verifer`
			`// complaining about use of an undefined physical register.`
			`for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();`
			`BI != BE; ++BI) {`
			`BI->addLiveIn(LaneVGPR);`
			`}`
			`}`

			`Spill.VGPR = LaneVGPRs[LaneVGPRIdx];`
			`Spill.Lane = Lane;`
			`return Spill;`
R600/SI: Implement spilling of SGPRs v5 SGPRs are spilled into VGPRs using the {READ,WRITE}LANE_B32 instructions. v2: - Fix encoding of Lane Mask - Use correct register flags, so we don't overwrite the low dword when restoring multi-dword registers. v3: - Register spilling seems to hang the GPU, so replace all shaders that need spilling with a dummy shader. v4: - Fix *LANE definitions - Change destination reg class for 32-bit SMRD instructions v5: - Remove small optimization that was crashing Serious Sam 3. https://bugs.freedesktop.org/show_bug.cgi?id=68224 https://bugs.freedesktop.org/show_bug.cgi?id=71285 NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195880 2013-11-28 05:23:35 +08:00			`}`
R600/SI: Implement VGPR register spilling for compute at -O0 v3 VGPRs are spilled to LDS. This still needs more testing, but we need to at least enable it at -O0, because the fast register allocator spills all registers that are live at the end of blocks and without this some future commits will break the flat-address-space.ll test. v2: Only calculate thread id once v3: Move insertion of spill instructions to SIRegisterInfo::eliminateFrameIndex() llvm-svn: 218348 2014-09-24 09:33:17 +08:00
			`unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(`
			`const MachineFunction &MF) const {`
Remove a few more calls to TargetMachine::getSubtarget from the R600 port. llvm-svn: 229804 2015-02-19 09:10:55 +08:00			`const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();`
R600/SI: Implement VGPR register spilling for compute at -O0 v3 VGPRs are spilled to LDS. This still needs more testing, but we need to at least enable it at -O0, because the fast register allocator spills all registers that are live at the end of blocks and without this some future commits will break the flat-address-space.ll test. v2: Only calculate thread id once v3: Move insertion of spill instructions to SIRegisterInfo::eliminateFrameIndex() llvm-svn: 218348 2014-09-24 09:33:17 +08:00			`// FIXME: We should get this information from kernel attributes if it`
			`// is available.`
			`return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();`
			`}`