llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

//===----------------------- SIFrameLowering.cpp --------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//

#include "SIFrameLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "AMDGPUSubtarget.h"

#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"

using namespace llvm;


static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
                                         const MachineFunction &MF) {
  return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
                      ST.getMaxNumSGPRs(MF) / 4);
}

static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
                                       const MachineFunction &MF) {
  return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
                      ST.getMaxNumSGPRs(MF));
}

void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
                                          MachineFunction &MF,
                                          MachineBasicBlock &MBB) const {
  const SIInstrInfo *TII = ST.getInstrInfo();
  const SIRegisterInfo* TRI = &TII->getRegisterInfo();

  // We don't need this if we only have spills since there is no user facing
  // scratch.

  // TODO: If we know we don't have flat instructions earlier, we can omit
  // this from the input registers.
  //
  // TODO: We only need to know if we access scratch space through a flat
  // pointer. Because we only detect if flat instructions are used at all,
  // this will be used more often than necessary on VI.

  // Debug location must be unknown since the first debug location is used to
  // determine the end of the prologue.
  DebugLoc DL;
  MachineBasicBlock::iterator I = MBB.begin();

  unsigned FlatScratchInitReg
    = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);

  MachineRegisterInfo &MRI = MF.getRegInfo();
  MRI.addLiveIn(FlatScratchInitReg);
  MBB.addLiveIn(FlatScratchInitReg);

  unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
  unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();

  // Do a 64-bit pointer add.
  if (ST.flatScratchIsPointer()) {
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
      .addReg(FlatScrInitLo)
      .addReg(ScratchWaveOffsetReg);
    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
      .addReg(FlatScrInitHi)
      .addImm(0);

    return;
  }

  // Copy the size in bytes.
  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
    .addReg(FlatScrInitHi, RegState::Kill);

  // Add wave offset in bytes to private base offset.
  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
    .addReg(FlatScrInitLo)
    .addReg(ScratchWaveOffsetReg);

  // Convert offset to 256-byte units.
  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
    .addReg(FlatScrInitLo, RegState::Kill)
    .addImm(8);
}

unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
  const SISubtarget &ST,
  const SIInstrInfo *TII,
  const SIRegisterInfo *TRI,
  SIMachineFunctionInfo *MFI,
  MachineFunction &MF) const {
  MachineRegisterInfo &MRI = MF.getRegInfo();

  // We need to insert initialization of the scratch resource descriptor.
  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
  if (ScratchRsrcReg == AMDGPU::NoRegister ||
      !MRI.isPhysRegUsed(ScratchRsrcReg))
    return AMDGPU::NoRegister;

  if (ST.hasSGPRInitBug() ||
      ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
    return ScratchRsrcReg;

  // We reserved the last registers for this. Shift it down to the end of those
  // which were actually used.
  //
  // FIXME: It might be safer to use a pseudoregister before replacement.

  // FIXME: We should be able to eliminate unused input registers. We only
  // cannot do this for the resources required for scratch access. For now we
  // skip over user SGPRs and may leave unused holes.

  // We find the resource first because it has an alignment requirement.

  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));

  // Skip the last N reserved elements because they should have already been
  // reserved for VCC etc.
  for (MCPhysReg Reg : AllSGPR128s) {
    // Pick the first unallocated one. Make sure we don't clobber the other
    // reserved input we needed.
    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
      MRI.replaceRegWith(ScratchRsrcReg, Reg);
      MFI->setScratchRSrcReg(Reg);
      return Reg;
    }
  }

  return ScratchRsrcReg;
}

// Shift down registers reserved for the scratch wave offset and stack pointer
// SGPRs.
std::pair<unsigned, unsigned>
SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
  const SISubtarget &ST,
  const SIInstrInfo *TII,
  const SIRegisterInfo *TRI,
  SIMachineFunctionInfo *MFI,
  MachineFunction &MF) const {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();

  // No replacement necessary.
  if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
    assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
  }

  unsigned SPReg = MFI->getStackPtrOffsetReg();
  if (ST.hasSGPRInitBug())
    return std::make_pair(ScratchWaveOffsetReg, SPReg);

  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();

  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
  if (NumPreloaded > AllSGPRs.size())
    return std::make_pair(ScratchWaveOffsetReg, SPReg);

  AllSGPRs = AllSGPRs.slice(NumPreloaded);

  // We need to drop register from the end of the list that we cannot use
  // for the scratch wave offset.
  // + 2 s102 and s103 do not exist on VI.
  // + 2 for vcc
  // + 2 for xnack_mask
  // + 2 for flat_scratch
  // + 4 for registers reserved for scratch resource register
  // + 1 for register reserved for scratch wave offset.  (By exluding this
  //     register from the list to consider, it means that when this
  //     register is being used for the scratch wave offset and there
  //     are no other free SGPRs, then the value will stay in this register.
  // + 1 if stack pointer is used.
  // ----
  //  13 (+1)
  unsigned ReservedRegCount = 13;

  if (AllSGPRs.size() < ReservedRegCount)
    return std::make_pair(ScratchWaveOffsetReg, SPReg);

  bool HandledScratchWaveOffsetReg =
    ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);

  for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
    // Pick the first unallocated SGPR. Be careful not to pick an alias of the
    // scratch descriptor, since we haven’t added its uses yet.
    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
      if (!HandledScratchWaveOffsetReg) {
        HandledScratchWaveOffsetReg = true;

        MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
        MFI->setScratchWaveOffsetReg(Reg);
        ScratchWaveOffsetReg = Reg;
        break;
      }
    }
  }

  return std::make_pair(ScratchWaveOffsetReg, SPReg);
}

void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
                                                MachineBasicBlock &MBB) const {
  // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
  // specified.
  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  auto AMDGPUASI = ST.getAMDGPUAS();
  if (ST.debuggerEmitPrologue())
    emitDebuggerPrologue(MF, MBB);

  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  // If we only have SGPR spills, we won't actually be using scratch memory
  // since these spill to VGPRs.
  //
  // FIXME: We should be cleaning up these unused SGPR spill frame indices
  // somewhere.

  const SIInstrInfo *TII = ST.getInstrInfo();
  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
  MachineRegisterInfo &MRI = MF.getRegInfo();

  // We need to do the replacement of the private segment buffer and wave offset
  // register even if there are no stack objects. There could be stores to undef
  // or a constant without an associated object.

  // FIXME: We still have implicit uses on SGPR spill instructions in case they
  // need to spill to vector memory. It's likely that will not happen, but at
  // this point it appears we need the setup. This part of the prolog should be
  // emitted after frame indices are eliminated.

  if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
    emitFlatScratchInit(ST, MF, MBB);

  unsigned SPReg = MFI->getStackPtrOffsetReg();
  if (SPReg != AMDGPU::NoRegister) {
    DebugLoc DL;
    int64_t StackSize = MF.getFrameInfo().getStackSize();

    if (StackSize == 0) {
      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
        .addReg(MFI->getScratchWaveOffsetReg());
    } else {
      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
        .addReg(MFI->getScratchWaveOffsetReg())
        .addImm(StackSize * ST.getWavefrontSize());
    }
  }

  unsigned ScratchRsrcReg
    = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);

  unsigned ScratchWaveOffsetReg;
  std::tie(ScratchWaveOffsetReg, SPReg)
    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);

  // It's possible to have uses of only ScratchWaveOffsetReg without
  // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
  // but the inverse is not true.
  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
    assert(ScratchRsrcReg == AMDGPU::NoRegister);
    return;
  }

  // We need to insert initialization of the scratch resource descriptor.
  unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
    MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);

  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
  if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
    PreloadedPrivateBufferReg = TRI->getPreloadedValue(
      MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
  }

  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
  bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
                         MRI.isPhysRegUsed(ScratchRsrcReg);

  // We added live-ins during argument lowering, but since they were not used
  // they were deleted. We're adding the uses now, so add them back.
  if (OffsetRegUsed) {
    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
           "scratch wave offset input is required");
    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
  }

  if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
    assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
    MRI.addLiveIn(PreloadedPrivateBufferReg);
    MBB.addLiveIn(PreloadedPrivateBufferReg);
  }

  // Make the register selected live throughout the function.
  for (MachineBasicBlock &OtherBB : MF) {
    if (&OtherBB == &MBB)
      continue;

    if (OffsetRegUsed)
      OtherBB.addLiveIn(ScratchWaveOffsetReg);

    if (ResourceRegUsed)
      OtherBB.addLiveIn(ScratchRsrcReg);
  }

  DebugLoc DL;
  MachineBasicBlock::iterator I = MBB.begin();

  // If we reserved the original input registers, we don't need to copy to the
  // reserved registers.

  bool CopyBuffer = ResourceRegUsed &&
    PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
    ST.isAmdCodeObjectV2(MF) &&
    ScratchRsrcReg != PreloadedPrivateBufferReg;

  // This needs to be careful of the copying order to avoid overwriting one of
  // the input registers before it's been copied to it's final
  // destination. Usually the offset should be copied first.
  bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
                                              ScratchWaveOffsetReg);
  if (CopyBuffer && CopyBufferFirst) {
    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
      .addReg(PreloadedPrivateBufferReg, RegState::Kill);
  }

  if (OffsetRegUsed &&
      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
      .addReg(PreloadedScratchWaveOffsetReg,
              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
  }

  if (CopyBuffer && !CopyBufferFirst) {
    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
      .addReg(PreloadedPrivateBufferReg, RegState::Kill);
  }

  if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
    assert(!ST.isAmdCodeObjectV2(MF));
    const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

    unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
    unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);

    // Use relocations to get the pointer, and setup the other bits manually.
    uint64_t Rsrc23 = TII->getScratchRsrcWords23();

    if (MFI->hasPrivateMemoryInputPtr()) {
      unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

      if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
        const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);

        BuildMI(MBB, I, DL, Mov64, Rsrc01)
          .addReg(PreloadedPrivateBufferReg)
          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
      } else {
        const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

        PointerType *PtrTy =
          PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
                           AMDGPUASI.CONSTANT_ADDRESS);
        MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
        auto MMO = MF.getMachineMemOperand(PtrInfo,
                                           MachineMemOperand::MOLoad |
                                           MachineMemOperand::MOInvariant |
                                           MachineMemOperand::MODereferenceable,
                                           0, 0);
        BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
          .addReg(PreloadedPrivateBufferReg)
          .addImm(0) // offset
          .addImm(0) // glc
          .addMemOperand(MMO)
          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
      }
    } else {
      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);

      BuildMI(MBB, I, DL, SMovB32, Rsrc0)
        .addExternalSymbol("SCRATCH_RSRC_DWORD0")
        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

      BuildMI(MBB, I, DL, SMovB32, Rsrc1)
        .addExternalSymbol("SCRATCH_RSRC_DWORD1")
        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

    }

    BuildMI(MBB, I, DL, SMovB32, Rsrc2)
      .addImm(Rsrc23 & 0xffffffff)
      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

    BuildMI(MBB, I, DL, SMovB32, Rsrc3)
      .addImm(Rsrc23 >> 32)
      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
  }
}

void SIFrameLowering::emitPrologue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  if (MFI->isEntryFunction())
    emitEntryFunctionPrologue(MF, MBB);
}

void SIFrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {

}

static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
       I != E; ++I) {
    if (!MFI.isDeadObjectIndex(I))
      return false;
  }

  return true;
}

int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                            unsigned &FrameReg) const {
  const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();

  FrameReg = RI->getFrameRegister(MF);
  return MF.getFrameInfo().getObjectOffset(FI);
}

void SIFrameLowering::processFunctionBeforeFrameFinalized(
  MachineFunction &MF,
  RegScavenger *RS) const {
  MachineFrameInfo &MFI = MF.getFrameInfo();

  if (!MFI.hasStackObjects())
    return;

  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const SIRegisterInfo &TRI = TII->getRegisterInfo();
  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
  bool AllSGPRSpilledToVGPRs = false;

  if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
    AllSGPRSpilledToVGPRs = true;

    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
    // are spilled to VGPRs, in which case we can eliminate the stack usage.
    //
    // XXX - This operates under the assumption that only other SGPR spills are
    // users of the frame index. I'm not 100% sure this is correct. The
    // StackColoring pass has a comment saying a future improvement would be to
    // merging of allocas with spill slots, but for now according to
    // MachineFrameInfo isSpillSlot can't alias any other object.
    for (MachineBasicBlock &MBB : MF) {
      MachineBasicBlock::iterator Next;
      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
        MachineInstr &MI = *I;
        Next = std::next(I);

        if (TII->isSGPRSpill(MI)) {
          int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
          if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
            bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
            (void)Spilled;
            assert(Spilled && "failed to spill SGPR to VGPR when allocated");
          } else
            AllSGPRSpilledToVGPRs = false;
        }
      }
    }

    FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
  }

  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
  // but currently hasNonSpillStackObjects is set only from source
  // allocas. Stack temps produced from legalization are not counted currently.
  if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
      !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
    assert(RS && "RegScavenger required if spilling");

    // We force this to be at offset 0 so no user object ever has 0 as an
    // address, so we may use 0 as an invalid pointer value. This is because
    // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
    // is required to be address space 0, we are forced to accept this for
    // now. Ideally we could have the stack in another address space with 0 as a
    // valid pointer, and -1 as the null value.
    //
    // This will also waste additional space when user stack objects require > 4
    // byte alignment.
    //
    // The main cost here is losing the offset for addressing modes. However
    // this also ensures we shouldn't need a register for the offset when
    // emergency scavenging.
    int ScavengeFI = MFI.CreateFixedObject(
      TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
    RS->addScavengingFrameIndex(ScavengeFI);
  }
}

void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
                                           MachineBasicBlock &MBB) const {
  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  const SIInstrInfo *TII = ST.getInstrInfo();
  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  MachineBasicBlock::iterator I = MBB.begin();
  DebugLoc DL;

  // For each dimension:
  for (unsigned i = 0; i < 3; ++i) {
    // Get work group ID SGPR, and make it live-in again.
    unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
    MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
    MBB.addLiveIn(WorkGroupIDSGPR);

    // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
    // order to spill it to scratch.
    unsigned WorkGroupIDVGPR =
      MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
      .addReg(WorkGroupIDSGPR);

    // Spill work group ID.
    int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
    TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
      WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);

    // Get work item ID VGPR, and make it live-in again.
    unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
    MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
    MBB.addLiveIn(WorkItemIDVGPR);

    // Spill work item ID.
    int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
    TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
      WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
  }
}
-												AMDGPU: Create emergency stack slots during frame lowering

Test has a bogus verifier error which will be fixed by later commits.

llvm-svn: 252327

											
										
										
											2015-11-07 02:17:45 +08:00
+								//===----------------------- SIFrameLowering.cpp --------------------------===//
 								//
 								//                     The LLVM Compiler Infrastructure
 								//
 								// This file is distributed under the University of Illinois Open Source
 								// License. See LICENSE.TXT for details.
 								//
 								//==-----------------------------------------------------------------------===//
 								#include "SIFrameLowering.h"
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								#include "SIInstrInfo.h"
 								#include "SIMachineFunctionInfo.h"
-												AMDGPU: Create emergency stack slots during frame lowering

Test has a bogus verifier error which will be fixed by later commits.

llvm-svn: 252327

											
										
										
											2015-11-07 02:17:45 +08:00
+								#include "SIRegisterInfo.h"
-												AMDGPU: Cleanup subtarget handling.

Split AMDGPUSubtarget into amdgcn/r600 specific subclasses.
This removes most of the static_casting of the basic codegen
classes everywhere, and tries to restrict the features
visible on the wrong target.

llvm-svn: 273652

											
										
										
											2016-06-24 14:30:11 +08:00
+								#include "AMDGPUSubtarget.h"
-												AMDGPU: Create emergency stack slots during frame lowering

Test has a bogus verifier error which will be fixed by later commits.

llvm-svn: 252327

											
										
										
											2015-11-07 02:17:45 +08:00
+								#include "llvm/CodeGen/MachineFrameInfo.h"
 								#include "llvm/CodeGen/MachineFunction.h"
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								#include "llvm/CodeGen/MachineInstrBuilder.h"
-												AMDGPU: Create emergency stack slots during frame lowering

Test has a bogus verifier error which will be fixed by later commits.

llvm-svn: 252327

											
										
										
											2015-11-07 02:17:45 +08:00
+								#include "llvm/CodeGen/RegisterScavenging.h"
 								using namespace llvm;
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
-												[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440

											
										
										
											2017-02-08 21:02:33 +08:00
+								static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
 								                                         const MachineFunction &MF) {
-												AMDGPU: Fix assert on ttmp registers

Use register class that does not include them when looking
for unallocated registers.

This is hit by the udiv v8i64 test in the opencl integer
conformance test, and takes a few seconds to compile in
a debug build so no test included.

llvm-svn: 269938

											
										
										
											2016-05-18 23:19:50 +08:00
+								  return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
-												[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440

											
										
										
											2017-02-08 21:02:33 +08:00
+								                      ST.getMaxNumSGPRs(MF) / 4);
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								}
-												[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440

											
										
										
											2017-02-08 21:02:33 +08:00
+								static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
 								                                       const MachineFunction &MF) {
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								  return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
-												[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440

											
										
										
											2017-02-08 21:02:33 +08:00
+								                      ST.getMaxNumSGPRs(MF));
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								}
-												AMDGPU: Merge initial gfx9 support

llvm-svn: 295554

											
										
										
											2017-02-19 02:29:53 +08:00
+								void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								                                          MachineFunction &MF,
 								                                          MachineBasicBlock &MBB) const {
-												AMDGPU: Merge initial gfx9 support

llvm-svn: 295554

											
										
										
											2017-02-19 02:29:53 +08:00
+								  const SIInstrInfo *TII = ST.getInstrInfo();
 								  const SIRegisterInfo* TRI = &TII->getRegisterInfo();
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  // We don't need this if we only have spills since there is no user facing
 								  // scratch.
 								  // TODO: If we know we don't have flat instructions earlier, we can omit
 								  // this from the input registers.
 								  //
 								  // TODO: We only need to know if we access scratch space through a flat
 								  // pointer. Because we only detect if flat instructions are used at all,
 								  // this will be used more often than necessary on VI.
 								  // Debug location must be unknown since the first debug location is used to
 								  // determine the end of the prologue.
 								  DebugLoc DL;
 								  MachineBasicBlock::iterator I = MBB.begin();
 								  unsigned FlatScratchInitReg
 								    = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
 								  MachineRegisterInfo &MRI = MF.getRegInfo();
 								  MRI.addLiveIn(FlatScratchInitReg);
 								  MBB.addLiveIn(FlatScratchInitReg);
 								  unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
-												AMDGPU: Merge initial gfx9 support

llvm-svn: 295554

											
										
										
											2017-02-19 02:29:53 +08:00
+								  unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
 								  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 								  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
-												AMDGPU: Merge initial gfx9 support

llvm-svn: 295554

											
										
										
											2017-02-19 02:29:53 +08:00
+								  // Do a 64-bit pointer add.
 								  if (ST.flatScratchIsPointer()) {
 								    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
 								      .addReg(FlatScrInitLo)
 								      .addReg(ScratchWaveOffsetReg);
 								    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
 								      .addReg(FlatScrInitHi)
 								      .addImm(0);
 								    return;
 								  }
 								  // Copy the size in bytes.
 								  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
 								    .addReg(FlatScrInitHi, RegState::Kill);
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  // Add wave offset in bytes to private base offset.
 								  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
 								  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
 								    .addReg(FlatScrInitLo)
 								    .addReg(ScratchWaveOffsetReg);
 								  // Convert offset to 256-byte units.
 								  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
 								    .addReg(FlatScrInitLo, RegState::Kill)
 								    .addImm(8);
 								}
 								unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
 								  const SISubtarget &ST,
 								  const SIInstrInfo *TII,
 								  const SIRegisterInfo *TRI,
 								  SIMachineFunctionInfo *MFI,
 								  MachineFunction &MF) const {
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								  MachineRegisterInfo &MRI = MF.getRegInfo();
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
 								  // We need to insert initialization of the scratch resource descriptor.
 								  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								  if (ScratchRsrcReg == AMDGPU::NoRegister ||
 								      !MRI.isPhysRegUsed(ScratchRsrcReg))
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								    return AMDGPU::NoRegister;
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
 								  if (ST.hasSGPRInitBug() ||
 								      ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
 								    return ScratchRsrcReg;
 								  // We reserved the last registers for this. Shift it down to the end of those
 								  // which were actually used.
 								  //
 								  // FIXME: It might be safer to use a pseudoregister before replacement.
 								  // FIXME: We should be able to eliminate unused input registers. We only
 								  // cannot do this for the resources required for scratch access. For now we
 								  // skip over user SGPRs and may leave unused holes.
 								  // We find the resource first because it has an alignment requirement.
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
-												[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440

											
										
										
											2017-02-08 21:02:33 +08:00
+								  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
-												AMDGPU: Don't use stack space for SGPR->VGPR spills

Before frame offsets are calculated, try to eliminate the
frame indexes used by SGPR spills. Then we can delete them
after.

I think for now we can be sure that no other instruction
will be re-using the same frame indexes. It should be easy
to notice if this assumption ever breaks since everything
asserts if it tries to use a dead frame index later.

The unused emergency stack slot seems to still be left behind,
so an additional 4 bytes is still wasted.

llvm-svn: 295753

											
										
										
											2017-02-22 03:12:08 +08:00
+								  // Skip the last N reserved elements because they should have already been
 								  // reserved for VCC etc.
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  for (MCPhysReg Reg : AllSGPR128s) {
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								    // Pick the first unallocated one. Make sure we don't clobber the other
 								    // reserved input we needed.
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								      MRI.replaceRegWith(ScratchRsrcReg, Reg);
 								      MFI->setScratchRSrcReg(Reg);
 								      return Reg;
 								    }
 								  }
 								  return ScratchRsrcReg;
 								}
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								// Shift down registers reserved for the scratch wave offset and stack pointer
 								// SGPRs.
 								std::pair<unsigned, unsigned>
 								SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  const SISubtarget &ST,
 								  const SIInstrInfo *TII,
 								  const SIRegisterInfo *TRI,
 								  SIMachineFunctionInfo *MFI,
 								  MachineFunction &MF) const {
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								  MachineRegisterInfo &MRI = MF.getRegInfo();
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
 								  // No replacement necessary.
 								  if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
 								    assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
 								    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
 								  }
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								  unsigned SPReg = MFI->getStackPtrOffsetReg();
 								  if (ST.hasSGPRInitBug())
 								    return std::make_pair(ScratchWaveOffsetReg, SPReg);
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
 								  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
-												[AMDGPU] Move register related queries to subtarget class

Differential Revision: https://reviews.llvm.org/D29318

llvm-svn: 294440

											
										
										
											2017-02-08 21:02:33 +08:00
+								  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  if (NumPreloaded > AllSGPRs.size())
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								    return std::make_pair(ScratchWaveOffsetReg, SPReg);
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
 								  AllSGPRs = AllSGPRs.slice(NumPreloaded);
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  // We need to drop register from the end of the list that we cannot use
 								  // for the scratch wave offset.
 								  // + 2 s102 and s103 do not exist on VI.
 								  // + 2 for vcc
 								  // + 2 for xnack_mask
 								  // + 2 for flat_scratch
 								  // + 4 for registers reserved for scratch resource register
 								  // + 1 for register reserved for scratch wave offset.  (By exluding this
 								  //     register from the list to consider, it means that when this
 								  //     register is being used for the scratch wave offset and there
 								  //     are no other free SGPRs, then the value will stay in this register.
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								  // + 1 if stack pointer is used.
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  // ----
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								  //  13 (+1)
 								  unsigned ReservedRegCount = 13;
 								  if (AllSGPRs.size() < ReservedRegCount)
 								    return std::make_pair(ScratchWaveOffsetReg, SPReg);
 								  bool HandledScratchWaveOffsetReg =
 								    ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								  for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								    // Pick the first unallocated SGPR. Be careful not to pick an alias of the
 								    // scratch descriptor, since we haven’t added its uses yet.
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								      if (!HandledScratchWaveOffsetReg) {
 								        HandledScratchWaveOffsetReg = true;
 								        MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
 								        MFI->setScratchWaveOffsetReg(Reg);
 								        ScratchWaveOffsetReg = Reg;
 								        break;
 								      }
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								    }
 								  }
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								  return std::make_pair(ScratchWaveOffsetReg, SPReg);
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								}
-												AMDGPU: Start defining a calling convention

Partially implement callee-side for arguments and return values.
byval doesn't work properly, and most likely sret or other on-stack
return values most as well.

llvm-svn: 303308

											
										
										
											2017-05-18 05:56:25 +08:00
+								void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
 								                                                MachineBasicBlock &MBB) const {
-												[AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header

Debugger prologue is emitted if -mattr=+amdgpu-debugger-emit-prologue.

Debugger prologue writes work group IDs and work item IDs to scratch memory at fixed location in the following format:
  - offset 0: work group ID x
  - offset 4: work group ID y
  - offset 8: work group ID z
  - offset 16: work item ID x
  - offset 20: work item ID y
  - offset 24: work item ID z

Set
  - amd_kernel_code_t::debug_wavefront_private_segment_offset_sgpr to scratch wave offset reg
  - amd_kernel_code_t::debug_private_segment_buffer_sgpr to scratch rsrc reg
  - amd_kernel_code_t::is_debug_supported to true if all debugger features are enabled

Differential Revision: http://reviews.llvm.org/D20335

llvm-svn: 273769

											
										
										
											2016-06-25 11:11:28 +08:00
+								  // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
 								  // specified.
 								  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-												[AMDGPU] Get address space mapping by target triple environment

As we introduced target triple environment amdgiz and amdgizcl, the address
space values are no longer enums. We have to decide the value by target triple.

The basic idea is to use struct AMDGPUAS to represent address space values.
For address space values which are not depend on target triple, use static
const members, so that they don't occupy extra memory space and is equivalent
to a compile time constant.

Since the struct is lightweight and cheap, it can be created on the fly at
the point of usage. Or it can be added as member to a pass and created at
the beginning of the run* function.

Differential Revision: https://reviews.llvm.org/D31284

llvm-svn: 298846

											
										
										
											2017-03-27 22:04:01 +08:00
+								  auto AMDGPUASI = ST.getAMDGPUAS();
-												[AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header

Debugger prologue is emitted if -mattr=+amdgpu-debugger-emit-prologue.

Debugger prologue writes work group IDs and work item IDs to scratch memory at fixed location in the following format:
  - offset 0: work group ID x
  - offset 4: work group ID y
  - offset 8: work group ID z
  - offset 16: work item ID x
  - offset 20: work item ID y
  - offset 24: work item ID z

Set
  - amd_kernel_code_t::debug_wavefront_private_segment_offset_sgpr to scratch wave offset reg
  - amd_kernel_code_t::debug_private_segment_buffer_sgpr to scratch rsrc reg
  - amd_kernel_code_t::is_debug_supported to true if all debugger features are enabled

Differential Revision: http://reviews.llvm.org/D20335

llvm-svn: 273769

											
										
										
											2016-06-25 11:11:28 +08:00
+								  if (ST.debuggerEmitPrologue())
 								    emitDebuggerPrologue(MF, MBB);
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
 								  // If we only have SGPR spills, we won't actually be using scratch memory
 								  // since these spill to VGPRs.
 								  //
 								  // FIXME: We should be cleaning up these unused SGPR spill frame indices
 								  // somewhere.
-												AMDGPU: Cleanup subtarget handling.

Split AMDGPUSubtarget into amdgcn/r600 specific subclasses.
This removes most of the static_casting of the basic codegen
classes everywhere, and tries to restrict the features
visible on the wrong target.

llvm-svn: 273652

											
										
										
											2016-06-24 14:30:11 +08:00
+								  const SIInstrInfo *TII = ST.getInstrInfo();
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
-												AMDGPU: Set flat_scratch from flat_scratch_init reg

This was hardcoded to the static private size, but this
would be missing the offset and additional size for someday
when we have dynamic sizing.

Also stops always initializing flat_scratch even when unused.

In the future we should stop emitting this unless flat instructions
are used to access private memory. For example this will initialize
it almost always on VI because flat is used for global access.

llvm-svn: 260658

											
										
										
											2016-02-12 14:31:30 +08:00
+								  MachineRegisterInfo &MRI = MF.getRegInfo();
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  // We need to do the replacement of the private segment buffer and wave offset
 								  // register even if there are no stack objects. There could be stores to undef
 								  // or a constant without an associated object.
 								  // FIXME: We still have implicit uses on SGPR spill instructions in case they
 								  // need to spill to vector memory. It's likely that will not happen, but at
 								  // this point it appears we need the setup. This part of the prolog should be
 								  // emitted after frame indices are eliminated.
 								  if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
-												AMDGPU: Merge initial gfx9 support

llvm-svn: 295554

											
										
										
											2017-02-19 02:29:53 +08:00
+								    emitFlatScratchInit(ST, MF, MBB);
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
+								  unsigned SPReg = MFI->getStackPtrOffsetReg();
 								  if (SPReg != AMDGPU::NoRegister) {
 								    DebugLoc DL;
 								    int64_t StackSize = MF.getFrameInfo().getStackSize();
 								    if (StackSize == 0) {
 								      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
 								        .addReg(MFI->getScratchWaveOffsetReg());
 								    } else {
 								      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
 								        .addReg(MFI->getScratchWaveOffsetReg())
 								        .addImm(StackSize * ST.getWavefrontSize());
 								    }
 								  }
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								  unsigned ScratchRsrcReg
 								    = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
-												AMDGPU: Shift down reserved SP register like scratch wave offset

llvm-svn: 301367

											
										
										
											2017-04-26 07:40:57 +08:00
 								  unsigned ScratchWaveOffsetReg;
 								  std::tie(ScratchWaveOffsetReg, SPReg)
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
 								  // It's possible to have uses of only ScratchWaveOffsetReg without
 								  // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
 								  // but the inverse is not true.
 								  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
 								    assert(ScratchRsrcReg == AMDGPU::NoRegister);
 								    return;
 								  }
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  // We need to insert initialization of the scratch resource descriptor.
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								  unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
 								    MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
 								  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
-												AMDGPU add support for spilling to a user sgpr pointed buffers

Summary:
This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].

Patch By: Dave Airlie

Reviewers: nhaehnle, arsenm, tstellarAMD

Reviewed By: arsenm

Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D25428

llvm-svn: 293000

											
										
										
											2017-01-25 09:25:13 +08:00
+								  if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								    PreloadedPrivateBufferReg = TRI->getPreloadedValue(
 								      MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
 								  }
-												AMDGPU: Slightly simplify prolog reserved register handling

Rely on MachineRegisterInfo's knowledge of used physical
registers.

Move flat_scratch initialization earlier, so the uses are visible
when making these decisions.

This will make it easier to add another reserved register
at the end for the stack pointer rather than handling another
special case.

llvm-svn: 301254

											
										
										
											2017-04-25 05:08:32 +08:00
+								  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
 								  bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
 								                         MRI.isPhysRegUsed(ScratchRsrcReg);
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
 								  // We added live-ins during argument lowering, but since they were not used
 								  // they were deleted. We're adding the uses now, so add them back.
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  if (OffsetRegUsed) {
 								    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
 								           "scratch wave offset input is required");
 								    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
 								    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
 								  }
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
-												AMDGPU add support for spilling to a user sgpr pointed buffers

Summary:
This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].

Patch By: Dave Airlie

Reviewers: nhaehnle, arsenm, tstellarAMD

Reviewed By: arsenm

Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D25428

llvm-svn: 293000

											
										
										
											2017-01-25 09:25:13 +08:00
+								    assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								    MRI.addLiveIn(PreloadedPrivateBufferReg);
 								    MBB.addLiveIn(PreloadedPrivateBufferReg);
 								  }
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  // Make the register selected live throughout the function.
 								  for (MachineBasicBlock &OtherBB : MF) {
 								    if (&OtherBB == &MBB)
 								      continue;
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								    if (OffsetRegUsed)
 								      OtherBB.addLiveIn(ScratchWaveOffsetReg);
 								    if (ResourceRegUsed)
 								      OtherBB.addLiveIn(ScratchRsrcReg);
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								  }
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								  DebugLoc DL;
-												AMDGPU: Refactor frame lowering

This will make future changes easier.

llvm-svn: 280296

											
										
										
											2016-09-01 05:52:21 +08:00
+								  MachineBasicBlock::iterator I = MBB.begin();
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  // If we reserved the original input registers, we don't need to copy to the
 								  // reserved registers.
 								  bool CopyBuffer = ResourceRegUsed &&
 								    PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
-												AMDGPU add support for spilling to a user sgpr pointed buffers

Summary:
This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].

Patch By: Dave Airlie

Reviewers: nhaehnle, arsenm, tstellarAMD

Reviewed By: arsenm

Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D25428

llvm-svn: 293000

											
										
										
											2017-01-25 09:25:13 +08:00
+								    ST.isAmdCodeObjectV2(MF) &&
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								    ScratchRsrcReg != PreloadedPrivateBufferReg;
 								  // This needs to be careful of the copying order to avoid overwriting one of
 								  // the input registers before it's been copied to it's final
 								  // destination. Usually the offset should be copied first.
 								  bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
 								                                              ScratchWaveOffsetReg);
 								  if (CopyBuffer && CopyBufferFirst) {
 								    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
 								      .addReg(PreloadedPrivateBufferReg, RegState::Kill);
 								  }
 								  if (OffsetRegUsed &&
 								      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
-												AMDGPU: Use copy instead of mov during frame lowering

This occurs before RA pseudos are expanded. It's less
code to emit the copy.

llvm-svn: 280297

											
										
										
											2016-09-01 05:52:25 +08:00
+								    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
-												AMDGPU: GFX9 GS and HS shaders always have the scratch wave offset in SGPR5

Reviewers: arsenm, nhaehnle

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D32645

llvm-svn: 302200

											
										
										
											2017-05-05 06:25:20 +08:00
+								      .addReg(PreloadedScratchWaveOffsetReg,
 								              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								  }
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  if (CopyBuffer && !CopyBufferFirst) {
-												AMDGPU: Use copy instead of mov during frame lowering

This occurs before RA pseudos are expanded. It's less
code to emit the copy.

llvm-svn: 280297

											
										
										
											2016-09-01 05:52:25 +08:00
+								    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
 								      .addReg(PreloadedPrivateBufferReg, RegState::Kill);
-												AMDGPU: Fix using incorrect private resource with no allocation

It's possible to have a use of the private resource descriptor or
scratch wave offset registers even though there are no allocated
stack objects. This would result in continuing to use the maximum
number reserved registers. This could go over the number of SGPRs
available on VI, or violate the SGPR limit requested by
the function attributes.

llvm-svn: 285435

											
										
										
											2016-10-29 03:43:31 +08:00
+								  }
-												AMDGPU add support for spilling to a user sgpr pointed buffers

Summary:
This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].

Patch By: Dave Airlie

Reviewers: nhaehnle, arsenm, tstellarAMD

Reviewed By: arsenm

Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D25428

llvm-svn: 293000

											
										
										
											2017-01-25 09:25:13 +08:00
+								  if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
 								    assert(!ST.isAmdCodeObjectV2(MF));
-												AMDGPU: Use copy instead of mov during frame lowering

This occurs before RA pseudos are expanded. It's less
code to emit the copy.

llvm-svn: 280297

											
										
										
											2016-09-01 05:52:25 +08:00
+								    const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
+								    unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
 								    unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
 								    // Use relocations to get the pointer, and setup the other bits manually.
 								    uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-												AMDGPU add support for spilling to a user sgpr pointed buffers

Summary:
This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].

Patch By: Dave Airlie

Reviewers: nhaehnle, arsenm, tstellarAMD

Reviewed By: arsenm

Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D25428

llvm-svn: 293000

											
										
										
											2017-01-25 09:25:13 +08:00
+								    if (MFI->hasPrivateMemoryInputPtr()) {
 								      unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
 								      if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
 								        const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
 								        BuildMI(MBB, I, DL, Mov64, Rsrc01)
 								          .addReg(PreloadedPrivateBufferReg)
 								          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 								      } else {
 								        const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
 								        PointerType *PtrTy =
 								          PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
-												[AMDGPU] Get address space mapping by target triple environment

As we introduced target triple environment amdgiz and amdgizcl, the address
space values are no longer enums. We have to decide the value by target triple.

The basic idea is to use struct AMDGPUAS to represent address space values.
For address space values which are not depend on target triple, use static
const members, so that they don't occupy extra memory space and is equivalent
to a compile time constant.

Since the struct is lightweight and cheap, it can be created on the fly at
the point of usage. Or it can be added as member to a pass and created at
the beginning of the run* function.

Differential Revision: https://reviews.llvm.org/D31284

llvm-svn: 298846

											
										
										
											2017-03-27 22:04:01 +08:00
+								                           AMDGPUASI.CONSTANT_ADDRESS);
-												AMDGPU add support for spilling to a user sgpr pointed buffers

Summary:
This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].

Patch By: Dave Airlie

Reviewers: nhaehnle, arsenm, tstellarAMD

Reviewed By: arsenm

Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D25428

llvm-svn: 293000

											
										
										
											2017-01-25 09:25:13 +08:00
+								        MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
 								        auto MMO = MF.getMachineMemOperand(PtrInfo,
 								                                           MachineMemOperand::MOLoad |
 								                                           MachineMemOperand::MOInvariant |
 								                                           MachineMemOperand::MODereferenceable,
 , 0);
 								        BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
 								          .addReg(PreloadedPrivateBufferReg)
 								          .addImm(0) // offset
 								          .addImm(0) // glc
 								          .addMemOperand(MMO)
 								          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 								      }
 								    } else {
 								      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
 								      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
 								      BuildMI(MBB, I, DL, SMovB32, Rsrc0)
 								        .addExternalSymbol("SCRATCH_RSRC_DWORD0")
 								        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 								      BuildMI(MBB, I, DL, SMovB32, Rsrc1)
 								        .addExternalSymbol("SCRATCH_RSRC_DWORD1")
 								        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 								    }
-												AMDGPU: Rework how private buffer passed for HSA

If we know we have stack objects, we reserve the registers
that the private buffer resource and wave offset are passed
and use them directly.

If not, reserve the last 5 SGPRs just in case we need to spill.
After register allocation, try to pick the next available registers
instead of the last SGPRs, and then insert copies from the inputs
to the reserved registers in the progloue.

This also only selectively enables all of the input registers
which are really required instead of always enabling them.

llvm-svn: 254331

											
										
										
											2015-12-01 05:16:03 +08:00
 								    BuildMI(MBB, I, DL, SMovB32, Rsrc2)
 								      .addImm(Rsrc23 & 0xffffffff)
 								      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 								    BuildMI(MBB, I, DL, SMovB32, Rsrc3)
 								      .addImm(Rsrc23 >> 32)
 								      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 								  }
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								}
-												AMDGPU: Start defining a calling convention

Partially implement callee-side for arguments and return values.
byval doesn't work properly, and most likely sret or other on-stack
return values most as well.

llvm-svn: 303308

											
										
										
											2017-05-18 05:56:25 +08:00
+								void SIFrameLowering::emitPrologue(MachineFunction &MF,
 								                                   MachineBasicBlock &MBB) const {
 								  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 								  if (MFI->isEntryFunction())
 								    emitEntryFunctionPrologue(MF, MBB);
 								}
-												AMDGPU: Cleanup subtarget handling.

Split AMDGPUSubtarget into amdgcn/r600 specific subclasses.
This removes most of the static_casting of the basic codegen
classes everywhere, and tries to restrict the features
visible on the wrong target.

llvm-svn: 273652

											
										
										
											2016-06-24 14:30:11 +08:00
+								void SIFrameLowering::emitEpilogue(MachineFunction &MF,
 								                                   MachineBasicBlock &MBB) const {
 								}
-												AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR

This should avoid reporting any stack needs to be allocated in the
case where no stack is truly used. An unused stack slot is still
left around in other cases where there are real stack objects
but no spilling occurs.

llvm-svn: 295891

											
										
										
											2017-02-23 06:23:32 +08:00
+								static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
 								  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
 								       I != E; ++I) {
 								    if (!MFI.isDeadObjectIndex(I))
 								      return false;
 								  }
 								  return true;
 								}
-												[AMDGPU] Split R600/SI getFrameIndexReference and emit stack object offsets for SI

Differential Revision: https://reviews.llvm.org/D29674

llvm-svn: 297499

											
										
										
											2017-03-11 03:39:07 +08:00
+								int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
 								                                            unsigned &FrameReg) const {
 								  const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
 								  FrameReg = RI->getFrameRegister(MF);
 								  return MF.getFrameInfo().getObjectOffset(FI);
 								}
-												AMDGPU: Create emergency stack slots during frame lowering

Test has a bogus verifier error which will be fixed by later commits.

llvm-svn: 252327

											
										
										
											2015-11-07 02:17:45 +08:00
+								void SIFrameLowering::processFunctionBeforeFrameFinalized(
 								  MachineFunction &MF,
 								  RegScavenger *RS) const {
-												MachineFunction: Return reference for getFrameInfo(); NFC

getFrameInfo() never returns nullptr so we should use a reference
instead of a pointer.

llvm-svn: 277017

											
										
										
											2016-07-29 02:40:00 +08:00
+								  MachineFrameInfo &MFI = MF.getFrameInfo();
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
-												MachineFunction: Return reference for getFrameInfo(); NFC

getFrameInfo() never returns nullptr so we should use a reference
instead of a pointer.

llvm-svn: 277017

											
										
										
											2016-07-29 02:40:00 +08:00
+								  if (!MFI.hasStackObjects())
-												AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.

The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.

Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.

The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.

llvm-svn: 254329

											
										
										
											2015-12-01 05:15:53 +08:00
+								    return;
-												AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR

This should avoid reporting any stack needs to be allocated in the
case where no stack is truly used. An unused stack slot is still
left around in other cases where there are real stack objects
but no spilling occurs.

llvm-svn: 295891

											
										
										
											2017-02-23 06:23:32 +08:00
+								  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 								  const SIInstrInfo *TII = ST.getInstrInfo();
 								  const SIRegisterInfo &TRI = TII->getRegisterInfo();
 								  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
 								  bool AllSGPRSpilledToVGPRs = false;
 								  if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
 								    AllSGPRSpilledToVGPRs = true;
 								    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
 								    // are spilled to VGPRs, in which case we can eliminate the stack usage.
 								    //
 								    // XXX - This operates under the assumption that only other SGPR spills are
 								    // users of the frame index. I'm not 100% sure this is correct. The
 								    // StackColoring pass has a comment saying a future improvement would be to
 								    // merging of allocas with spill slots, but for now according to
 								    // MachineFrameInfo isSpillSlot can't alias any other object.
 								    for (MachineBasicBlock &MBB : MF) {
 								      MachineBasicBlock::iterator Next;
 								      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
 								        MachineInstr &MI = *I;
 								        Next = std::next(I);
 								        if (TII->isSGPRSpill(MI)) {
 								          int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
 								          if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
 								            bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
 								            (void)Spilled;
 								            assert(Spilled && "failed to spill SGPR to VGPR when allocated");
 								          } else
 								            AllSGPRSpilledToVGPRs = false;
 								        }
 								      }
 								    }
 								    FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
 								  }
 								  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
 								  // but currently hasNonSpillStackObjects is set only from source
 								  // allocas. Stack temps produced from legalization are not counted currently.
 								  if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
 								      !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
 								    assert(RS && "RegScavenger required if spilling");
-												AMDGPU: Always allocate emergency stack slot at offset 0

This allows us to ensure that 0 is never a valid pointer
to a user object, and ensures that the offset is always legal
without needing a register to access it. This comes at the cost
of usable offsets and wasted stack space.

llvm-svn: 295877

											
										
										
											2017-02-23 05:05:25 +08:00
+								    // We force this to be at offset 0 so no user object ever has 0 as an
 								    // address, so we may use 0 as an invalid pointer value. This is because
 								    // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
 								    // is required to be address space 0, we are forced to accept this for
 								    // now. Ideally we could have the stack in another address space with 0 as a
 								    // valid pointer, and -1 as the null value.
 								    //
 								    // This will also waste additional space when user stack objects require > 4
 								    // byte alignment.
 								    //
 								    // The main cost here is losing the offset for addressing modes. However
 								    // this also ensures we shouldn't need a register for the offset when
 								    // emergency scavenging.
 								    int ScavengeFI = MFI.CreateFixedObject(
-												Move size and alignment information of regclass to TargetRegisterInfo

1. RegisterClass::getSize() is split into two functions:
   - TargetRegisterInfo::getRegSizeInBits(const TargetRegisterClass &RC) const;
   - TargetRegisterInfo::getSpillSize(const TargetRegisterClass &RC) const;
2. RegisterClass::getAlignment() is replaced by:
   - TargetRegisterInfo::getSpillAlignment(const TargetRegisterClass &RC) const;

This will allow making those values depend on subtarget features in the
future.

Differential Revision: https://reviews.llvm.org/D31783

llvm-svn: 301221

											
										
										
											2017-04-25 02:55:33 +08:00
+								      TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
-												AMDGPU: Always allocate emergency stack slot at offset 0

This allows us to ensure that 0 is never a valid pointer
to a user object, and ensures that the offset is always legal
without needing a register to access it. This comes at the cost
of usable offsets and wasted stack space.

llvm-svn: 295877

											
										
										
											2017-02-23 05:05:25 +08:00
+								    RS->addScavengingFrameIndex(ScavengeFI);
 								  }
-												AMDGPU: Create emergency stack slots during frame lowering

Test has a bogus verifier error which will be fixed by later commits.

llvm-svn: 252327

											
										
										
											2015-11-07 02:17:45 +08:00
+								}
-												[AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header

Debugger prologue is emitted if -mattr=+amdgpu-debugger-emit-prologue.

Debugger prologue writes work group IDs and work item IDs to scratch memory at fixed location in the following format:
  - offset 0: work group ID x
  - offset 4: work group ID y
  - offset 8: work group ID z
  - offset 16: work item ID x
  - offset 20: work item ID y
  - offset 24: work item ID z

Set
  - amd_kernel_code_t::debug_wavefront_private_segment_offset_sgpr to scratch wave offset reg
  - amd_kernel_code_t::debug_private_segment_buffer_sgpr to scratch rsrc reg
  - amd_kernel_code_t::is_debug_supported to true if all debugger features are enabled

Differential Revision: http://reviews.llvm.org/D20335

llvm-svn: 273769

											
										
										
											2016-06-25 11:11:28 +08:00
 								void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
 								                                           MachineBasicBlock &MBB) const {
 								  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 								  const SIInstrInfo *TII = ST.getInstrInfo();
 								  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
 								  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 								  MachineBasicBlock::iterator I = MBB.begin();
 								  DebugLoc DL;
 								  // For each dimension:
 								  for (unsigned i = 0; i < 3; ++i) {
 								    // Get work group ID SGPR, and make it live-in again.
 								    unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
 								    MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
 								    MBB.addLiveIn(WorkGroupIDSGPR);
 								    // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
 								    // order to spill it to scratch.
 								    unsigned WorkGroupIDVGPR =
 								      MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 								    BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
 								      .addReg(WorkGroupIDSGPR);
 								    // Spill work group ID.
 								    int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
 								    TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
 								      WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
 								    // Get work item ID VGPR, and make it live-in again.
 								    unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
 								    MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
 								    MBB.addLiveIn(WorkItemIDVGPR);
 								    // Spill work item ID.
 								    int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
 								    TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
 								      WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
 								  }
 								}