2015-11-07 02:17:45 +08:00
|
|
|
//===----------------------- SIFrameLowering.cpp --------------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-11-07 02:17:45 +08:00
|
|
|
//
|
|
|
|
//==-----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SIFrameLowering.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2015-12-01 05:15:53 +08:00
|
|
|
#include "SIInstrInfo.h"
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
2015-11-07 02:17:45 +08:00
|
|
|
#include "SIRegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2016-06-24 14:30:11 +08:00
|
|
|
|
2018-03-30 05:30:06 +08:00
|
|
|
#include "llvm/CodeGen/LivePhysRegs.h"
|
2015-11-07 02:17:45 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2015-12-01 05:15:53 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2015-11-07 02:17:45 +08:00
|
|
|
#include "llvm/CodeGen/RegisterScavenging.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
#define DEBUG_TYPE "frame-info"
|
|
|
|
|
|
|
|
|
|
|
|
// Find a scratch register that we can use at the start of the prologue to
|
|
|
|
// re-align the stack pointer. We avoid using callee-save registers since they
|
|
|
|
// may appear to be free when this is called from canUseAsPrologue (during
|
|
|
|
// shrink wrapping), but then no longer be free when this is called from
|
|
|
|
// emitPrologue.
|
|
|
|
//
|
|
|
|
// FIXME: This is a bit conservative, since in the above case we could use one
|
|
|
|
// of the callee-save registers as a scratch temp to re-align the stack pointer,
|
|
|
|
// but we would then have to make sure that we were in fact saving at least one
|
|
|
|
// callee-save register in the prologue, which is additional complexity that
|
|
|
|
// doesn't seem worth the benefit.
|
2020-04-22 03:06:53 +08:00
|
|
|
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
|
|
|
|
LivePhysRegs &LiveRegs,
|
|
|
|
const TargetRegisterClass &RC,
|
|
|
|
bool Unused = false) {
|
2019-07-09 03:03:38 +08:00
|
|
|
// Mark callee saved registers as used so we will not choose them.
|
|
|
|
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
|
|
|
|
for (unsigned i = 0; CSRegs[i]; ++i)
|
|
|
|
LiveRegs.addReg(CSRegs[i]);
|
|
|
|
|
|
|
|
if (Unused) {
|
|
|
|
// We are looking for a register that can be used throughout the entire
|
|
|
|
// function, so any use is unacceptable.
|
2020-04-22 03:06:53 +08:00
|
|
|
for (MCRegister Reg : RC) {
|
2019-07-09 03:03:38 +08:00
|
|
|
if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
|
|
|
|
return Reg;
|
|
|
|
}
|
|
|
|
} else {
|
2020-04-22 03:06:53 +08:00
|
|
|
for (MCRegister Reg : RC) {
|
2019-07-09 03:03:38 +08:00
|
|
|
if (LiveRegs.available(MRI, Reg))
|
|
|
|
return Reg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we require an unused register, this is used in contexts where failure is
|
|
|
|
// an option and has an alternative plan. In other contexts, this must
|
|
|
|
// succeed0.
|
|
|
|
if (!Unused)
|
|
|
|
report_fatal_error("failed to find free scratch register");
|
|
|
|
|
2020-04-22 03:06:53 +08:00
|
|
|
return MCRegister();
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
|
|
|
|
LivePhysRegs &LiveRegs,
|
|
|
|
Register &TempSGPR,
|
|
|
|
Optional<int> &FrameIndex,
|
|
|
|
bool IsFP) {
|
|
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// We need to save and restore the current FP/BP.
|
|
|
|
|
|
|
|
// 1: If there is already a VGPR with free lanes, use it. We
|
|
|
|
// may already have to pay the penalty for spilling a CSR VGPR.
|
|
|
|
if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
|
2020-07-01 15:28:11 +08:00
|
|
|
int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
|
2020-04-21 17:34:33 +08:00
|
|
|
TargetStackID::SGPRSpill);
|
|
|
|
|
|
|
|
if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
|
|
|
|
llvm_unreachable("allocate SGPR spill should have worked");
|
|
|
|
|
|
|
|
FrameIndex = NewFI;
|
|
|
|
|
|
|
|
LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
|
|
|
|
dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
|
|
|
|
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
|
|
|
|
<< '\n');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2: Next, try to save the FP/BP in an unused SGPR.
|
|
|
|
TempSGPR = findScratchNonCalleeSaveRegister(
|
|
|
|
MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
|
|
|
|
|
|
|
|
if (!TempSGPR) {
|
2020-07-01 15:28:11 +08:00
|
|
|
int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
|
2020-04-21 17:34:33 +08:00
|
|
|
TargetStackID::SGPRSpill);
|
|
|
|
|
|
|
|
if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
|
|
|
|
// 3: There's no free lane to spill, and no free register to save FP/BP,
|
|
|
|
// so we're forced to spill another VGPR to use for the spill.
|
|
|
|
FrameIndex = NewFI;
|
2020-09-06 09:30:36 +08:00
|
|
|
|
|
|
|
LLVM_DEBUG(
|
|
|
|
auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
|
|
|
|
dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
|
|
|
|
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
|
2020-04-21 17:34:33 +08:00
|
|
|
} else {
|
2020-09-06 09:30:36 +08:00
|
|
|
// Remove dead <NewFI> index
|
|
|
|
MF.getFrameInfo().RemoveStackObject(NewFI);
|
2020-04-21 17:34:33 +08:00
|
|
|
// 4: If all else fails, spill the FP/BP to memory.
|
|
|
|
FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
|
2020-09-06 09:30:36 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
|
|
|
|
<< (IsFP ? "FP" : "BP") << '\n');
|
2020-04-21 17:34:33 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
|
|
|
|
<< printReg(TempSGPR, TRI) << '\n');
|
|
|
|
}
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// We need to specially emit stack operations here because a different frame
|
|
|
|
// register is used than in the rest of the function, as getFrameRegister would
|
|
|
|
// use.
|
2020-10-22 05:27:03 +08:00
|
|
|
static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|
|
|
MachineBasicBlock &MBB,
|
2019-07-09 03:03:38 +08:00
|
|
|
MachineBasicBlock::iterator I,
|
2020-04-22 03:06:53 +08:00
|
|
|
const SIInstrInfo *TII, Register SpillReg,
|
|
|
|
Register ScratchRsrcReg, Register SPReg, int FI) {
|
2019-07-09 03:03:38 +08:00
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
MachineFrameInfo &MFI = MF->getFrameInfo();
|
|
|
|
|
|
|
|
int64_t Offset = MFI.getObjectOffset(FI);
|
|
|
|
|
|
|
|
MachineMemOperand *MMO = MF->getMachineMemOperand(
|
|
|
|
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
|
[Alignment][NFC] Transitionning more getMachineMemOperand call sites
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: arsenm, dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, Jim, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77121
2020-03-31 16:05:00 +08:00
|
|
|
MFI.getObjectAlign(FI));
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
if (ST.enableFlatScratch()) {
|
|
|
|
if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
|
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR))
|
|
|
|
.addReg(SpillReg, RegState::Kill)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(Offset)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // dlc
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) {
|
2019-07-09 03:03:38 +08:00
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
|
|
|
|
.addReg(SpillReg, RegState::Kill)
|
|
|
|
.addReg(ScratchRsrcReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(Offset)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // tfe
|
|
|
|
.addImm(0) // dlc
|
[AMDGPU] Extend buffer intrinsics with swizzling
Summary:
Extend cachepolicy operand in the new VMEM buffer intrinsics
to supply information whether the buffer data is swizzled.
Also, propagate this information to MIR.
Intrinsics updated:
int_amdgcn_raw_buffer_load
int_amdgcn_raw_buffer_load_format
int_amdgcn_raw_buffer_store
int_amdgcn_raw_buffer_store_format
int_amdgcn_raw_tbuffer_load
int_amdgcn_raw_tbuffer_store
int_amdgcn_struct_buffer_load
int_amdgcn_struct_buffer_load_format
int_amdgcn_struct_buffer_store
int_amdgcn_struct_buffer_store_format
int_amdgcn_struct_tbuffer_load
int_amdgcn_struct_tbuffer_store
Furthermore, disable merging of VMEM buffer instructions
in SI Load/Store optimizer, if the "swizzled" bit on the instruction
is on.
The default value of the bit is 0, meaning that data in buffer
is linear and buffer instructions can be merged.
There is no difference in the generated code with this commit.
However, in the future it will be expected that front-ends
use buffer intrinsics with correct "swizzled" bit set.
Reviewers: arsenm, nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, arphaman, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68200
llvm-svn: 373491
2019-10-03 01:22:36 +08:00
|
|
|
.addImm(0) // swz
|
2019-07-09 03:03:38 +08:00
|
|
|
.addMemOperand(MMO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-08-12 03:04:17 +08:00
|
|
|
// Don't clobber the TmpVGPR if we also need a scratch reg for the stack
|
|
|
|
// offset in the spill.
|
|
|
|
LiveRegs.addReg(SpillReg);
|
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
if (ST.enableFlatScratch()) {
|
|
|
|
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
|
|
|
MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass);
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(Offset);
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR))
|
|
|
|
.addReg(SpillReg, RegState::Kill)
|
|
|
|
.addReg(OffsetReg, RegState::Kill)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // dlc
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
} else {
|
|
|
|
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
|
|
|
MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
|
|
|
|
.addImm(Offset);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
|
|
|
|
.addReg(SpillReg, RegState::Kill)
|
|
|
|
.addReg(OffsetReg, RegState::Kill)
|
|
|
|
.addReg(ScratchRsrcReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // tfe
|
|
|
|
.addImm(0) // dlc
|
|
|
|
.addImm(0) // swz
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
}
|
2020-08-12 03:04:17 +08:00
|
|
|
|
|
|
|
LiveRegs.removeReg(SpillReg);
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|
|
|
MachineBasicBlock &MBB,
|
2019-07-09 03:03:38 +08:00
|
|
|
MachineBasicBlock::iterator I,
|
2020-04-22 03:06:53 +08:00
|
|
|
const SIInstrInfo *TII, Register SpillReg,
|
|
|
|
Register ScratchRsrcReg, Register SPReg, int FI) {
|
2019-07-09 03:03:38 +08:00
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
MachineFrameInfo &MFI = MF->getFrameInfo();
|
|
|
|
int64_t Offset = MFI.getObjectOffset(FI);
|
|
|
|
|
|
|
|
MachineMemOperand *MMO = MF->getMachineMemOperand(
|
|
|
|
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
|
[Alignment][NFC] Transitionning more getMachineMemOperand call sites
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: arsenm, dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, Jim, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77121
2020-03-31 16:05:00 +08:00
|
|
|
MFI.getObjectAlign(FI));
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
if (ST.enableFlatScratch()) {
|
|
|
|
if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
|
|
|
|
BuildMI(MBB, I, DebugLoc(),
|
|
|
|
TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(Offset)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // dlc
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
|
|
|
MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(Offset);
|
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR),
|
|
|
|
SpillReg)
|
|
|
|
.addReg(OffsetReg, RegState::Kill)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // dlc
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-10-07 20:47:36 +08:00
|
|
|
if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) {
|
2019-07-09 03:03:38 +08:00
|
|
|
BuildMI(MBB, I, DebugLoc(),
|
|
|
|
TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
|
|
|
|
.addReg(ScratchRsrcReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(Offset)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // tfe
|
|
|
|
.addImm(0) // dlc
|
[AMDGPU] Extend buffer intrinsics with swizzling
Summary:
Extend cachepolicy operand in the new VMEM buffer intrinsics
to supply information whether the buffer data is swizzled.
Also, propagate this information to MIR.
Intrinsics updated:
int_amdgcn_raw_buffer_load
int_amdgcn_raw_buffer_load_format
int_amdgcn_raw_buffer_store
int_amdgcn_raw_buffer_store_format
int_amdgcn_raw_tbuffer_load
int_amdgcn_raw_tbuffer_store
int_amdgcn_struct_buffer_load
int_amdgcn_struct_buffer_load_format
int_amdgcn_struct_buffer_store
int_amdgcn_struct_buffer_store_format
int_amdgcn_struct_tbuffer_load
int_amdgcn_struct_tbuffer_store
Furthermore, disable merging of VMEM buffer instructions
in SI Load/Store optimizer, if the "swizzled" bit on the instruction
is on.
The default value of the bit is 0, meaning that data in buffer
is linear and buffer instructions can be merged.
There is no difference in the generated code with this commit.
However, in the future it will be expected that front-ends
use buffer intrinsics with correct "swizzled" bit set.
Reviewers: arsenm, nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, arphaman, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68200
llvm-svn: 373491
2019-10-03 01:22:36 +08:00
|
|
|
.addImm(0) // swz
|
2019-07-09 03:03:38 +08:00
|
|
|
.addMemOperand(MMO);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
|
|
|
MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
|
|
|
|
.addImm(Offset);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DebugLoc(),
|
|
|
|
TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
|
|
|
|
.addReg(OffsetReg, RegState::Kill)
|
|
|
|
.addReg(ScratchRsrcReg)
|
|
|
|
.addReg(SPReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // slc
|
|
|
|
.addImm(0) // tfe
|
|
|
|
.addImm(0) // dlc
|
[AMDGPU] Extend buffer intrinsics with swizzling
Summary:
Extend cachepolicy operand in the new VMEM buffer intrinsics
to supply information whether the buffer data is swizzled.
Also, propagate this information to MIR.
Intrinsics updated:
int_amdgcn_raw_buffer_load
int_amdgcn_raw_buffer_load_format
int_amdgcn_raw_buffer_store
int_amdgcn_raw_buffer_store_format
int_amdgcn_raw_tbuffer_load
int_amdgcn_raw_tbuffer_store
int_amdgcn_struct_buffer_load
int_amdgcn_struct_buffer_load_format
int_amdgcn_struct_buffer_store
int_amdgcn_struct_buffer_store_format
int_amdgcn_struct_tbuffer_load
int_amdgcn_struct_tbuffer_store
Furthermore, disable merging of VMEM buffer instructions
in SI Load/Store optimizer, if the "swizzled" bit on the instruction
is on.
The default value of the bit is 0, meaning that data in buffer
is linear and buffer instructions can be merged.
There is no difference in the generated code with this commit.
However, in the future it will be expected that front-ends
use buffer intrinsics with correct "swizzled" bit set.
Reviewers: arsenm, nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, arphaman, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68200
llvm-svn: 373491
2019-10-03 01:22:36 +08:00
|
|
|
.addImm(0) // swz
|
2019-07-09 03:03:38 +08:00
|
|
|
.addMemOperand(MMO);
|
|
|
|
}
|
|
|
|
|
2020-10-15 19:26:44 +08:00
|
|
|
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|
|
|
const DebugLoc &DL, const SIInstrInfo *TII,
|
|
|
|
Register TargetReg) {
|
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
|
|
|
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
|
|
|
Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
|
|
|
|
Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
|
|
|
|
|
|
|
|
if (MFI->getGITPtrHigh() != 0xffffffff) {
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, TargetHi)
|
|
|
|
.addImm(MFI->getGITPtrHigh())
|
|
|
|
.addReg(TargetReg, RegState::ImplicitDefine);
|
|
|
|
} else {
|
|
|
|
const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
|
|
|
|
BuildMI(MBB, I, DL, GetPC64, TargetReg);
|
|
|
|
}
|
|
|
|
Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
|
|
|
|
MF->getRegInfo().addLiveIn(GitPtrLo);
|
|
|
|
MBB.addLiveIn(GitPtrLo);
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, TargetLo)
|
|
|
|
.addReg(GitPtrLo);
|
|
|
|
}
|
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
|
|
|
|
void SIFrameLowering::emitEntryFunctionFlatScratchInit(
|
|
|
|
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|
|
|
const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-02-19 02:29:53 +08:00
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2020-02-25 07:50:21 +08:00
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
2017-08-04 07:00:29 +08:00
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2017-02-19 02:29:53 +08:00
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
// We don't need this if we only have spills since there is no user facing
|
|
|
|
// scratch.
|
|
|
|
|
|
|
|
// TODO: If we know we don't have flat instructions earlier, we can omit
|
|
|
|
// this from the input registers.
|
|
|
|
//
|
|
|
|
// TODO: We only need to know if we access scratch space through a flat
|
|
|
|
// pointer. Because we only detect if flat instructions are used at all,
|
|
|
|
// this will be used more often than necessary on VI.
|
|
|
|
|
2020-10-15 19:26:44 +08:00
|
|
|
Register FlatScrInitLo;
|
|
|
|
Register FlatScrInitHi;
|
2016-09-01 05:52:21 +08:00
|
|
|
|
2020-10-15 19:26:44 +08:00
|
|
|
if (ST.isAmdPalOS()) {
|
|
|
|
// Extract the scratch offset from the descriptor in the GIT
|
|
|
|
LivePhysRegs LiveRegs;
|
|
|
|
LiveRegs.init(*TRI);
|
|
|
|
LiveRegs.addLiveIns(MBB);
|
|
|
|
|
|
|
|
// Find unused reg to load flat scratch init into
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
Register FlatScrInit = AMDGPU::NoRegister;
|
|
|
|
ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
|
|
|
|
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
|
|
|
|
AllSGPR64s = AllSGPR64s.slice(
|
|
|
|
std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
|
|
|
|
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
|
|
|
|
for (MCPhysReg Reg : AllSGPR64s) {
|
|
|
|
if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
|
|
|
|
!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
|
|
|
|
FlatScrInit = Reg;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(FlatScrInit && "Failed to find free register for scratch init");
|
|
|
|
|
|
|
|
FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
|
|
|
|
FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
|
|
|
|
|
|
|
|
buildGitPtr(MBB, I, DL, TII, FlatScrInit);
|
2016-09-01 05:52:21 +08:00
|
|
|
|
2020-10-15 19:26:44 +08:00
|
|
|
// We now have the GIT ptr - now get the scratch descriptor from the entry
|
|
|
|
// at offset 0 (or offset 16 for a compute shader).
|
|
|
|
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
|
|
|
|
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
|
|
|
|
auto *MMO = MF.getMachineMemOperand(
|
|
|
|
PtrInfo,
|
|
|
|
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
|
|
|
|
MachineMemOperand::MODereferenceable,
|
|
|
|
8, Align(4));
|
|
|
|
unsigned Offset =
|
|
|
|
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
|
|
|
|
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
|
|
|
|
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
|
|
|
|
.addReg(FlatScrInit)
|
|
|
|
.addImm(EncodedOffset) // offset
|
|
|
|
.addImm(0) // glc
|
|
|
|
.addImm(0) // dlc
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
|
|
|
|
// Mask the offset in [47:0] of the descriptor
|
|
|
|
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
|
|
|
|
BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
|
|
|
|
.addReg(FlatScrInitHi)
|
|
|
|
.addImm(0xffff);
|
|
|
|
} else {
|
|
|
|
Register FlatScratchInitReg =
|
|
|
|
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
|
|
|
|
assert(FlatScratchInitReg);
|
|
|
|
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
MRI.addLiveIn(FlatScratchInitReg);
|
|
|
|
MBB.addLiveIn(FlatScratchInitReg);
|
|
|
|
|
|
|
|
FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
|
|
|
|
FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
|
|
|
|
}
|
2016-09-01 05:52:21 +08:00
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
// Do a 64-bit pointer add.
|
|
|
|
if (ST.flatScratchIsPointer()) {
|
2019-05-01 06:08:23 +08:00
|
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
|
|
|
.addReg(FlatScrInitLo)
|
|
|
|
.addReg(ScratchWaveOffsetReg);
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
|
|
|
|
.addReg(FlatScrInitHi)
|
|
|
|
.addImm(0);
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
|
|
|
|
addReg(FlatScrInitLo).
|
|
|
|
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
|
|
|
|
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
|
|
|
|
addReg(FlatScrInitHi).
|
|
|
|
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
|
|
|
|
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Fix incorrect arch assert while setting up FlatScratchInit
Reviewers: arsenm, foad, rampitec, scott.linder
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D84391
2020-07-23 17:48:03 +08:00
|
|
|
// For GFX9.
|
2017-02-19 02:29:53 +08:00
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
|
|
|
|
.addReg(FlatScrInitLo)
|
|
|
|
.addReg(ScratchWaveOffsetReg);
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
|
|
|
|
.addReg(FlatScrInitHi)
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Fix incorrect arch assert while setting up FlatScratchInit
Reviewers: arsenm, foad, rampitec, scott.linder
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D84391
2020-07-23 17:48:03 +08:00
|
|
|
assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
|
2019-05-01 06:08:23 +08:00
|
|
|
|
2017-02-19 02:29:53 +08:00
|
|
|
// Copy the size in bytes.
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
|
|
|
|
.addReg(FlatScrInitHi, RegState::Kill);
|
|
|
|
|
2016-09-01 05:52:21 +08:00
|
|
|
// Add wave offset in bytes to private base offset.
|
|
|
|
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
|
|
|
|
.addReg(FlatScrInitLo)
|
|
|
|
.addReg(ScratchWaveOffsetReg);
|
|
|
|
|
|
|
|
// Convert offset to 256-byte units.
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
|
|
|
|
.addReg(FlatScrInitLo, RegState::Kill)
|
|
|
|
.addImm(8);
|
|
|
|
}
|
|
|
|
|
2020-11-10 08:40:35 +08:00
|
|
|
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
|
|
|
|
// memory. They should have been removed by now.
|
|
|
|
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
|
|
|
|
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
|
|
|
|
I != E; ++I) {
|
|
|
|
if (!MFI.isDeadObjectIndex(I))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
// Shift down registers reserved for the scratch RSRC.
|
|
|
|
Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
|
2020-05-01 02:25:24 +08:00
|
|
|
MachineFunction &MF) const {
|
2020-02-25 07:50:21 +08:00
|
|
|
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
2017-04-25 05:08:32 +08:00
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2020-02-25 07:50:21 +08:00
|
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
assert(MFI->isEntryFunction());
|
|
|
|
|
|
|
|
Register ScratchRsrcReg = MFI->getScratchRSrcReg();
|
2016-09-01 05:52:21 +08:00
|
|
|
|
2020-11-10 08:40:35 +08:00
|
|
|
if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
|
|
|
|
allStackObjectsAreDead(MF.getFrameInfo())))
|
2020-05-01 02:25:24 +08:00
|
|
|
return Register();
|
2016-09-01 05:52:21 +08:00
|
|
|
|
|
|
|
if (ST.hasSGPRInitBug() ||
|
|
|
|
ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
|
|
|
|
return ScratchRsrcReg;
|
|
|
|
|
|
|
|
// We reserved the last registers for this. Shift it down to the end of those
|
|
|
|
// which were actually used.
|
|
|
|
//
|
|
|
|
// FIXME: It might be safer to use a pseudoregister before replacement.
|
|
|
|
|
|
|
|
// FIXME: We should be able to eliminate unused input registers. We only
|
|
|
|
// cannot do this for the resources required for scratch access. For now we
|
|
|
|
// skip over user SGPRs and may leave unused holes.
|
|
|
|
|
2016-10-29 03:43:31 +08:00
|
|
|
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
|
[NFC] Move getAll{S,V}GPR{32,128} methods to SIFrameLowering
Summary:
Future patch needs some of these in multiple places.
The definitions of these can't be in the header and be eligible for
inlining without making the full declaration of GCNSubtarget visible.
I'm not sure what the right trade-off is, but I opted to not bloat
SIRegisterInfo.h
Reviewers: arsenm, cdevadas
Reviewed By: arsenm
Subscribers: RamNalamothu, qcolombet, jvesely, wdng, nhaehnle, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79878
2020-06-18 00:08:09 +08:00
|
|
|
ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
|
2016-10-29 03:43:31 +08:00
|
|
|
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
// Skip the last N reserved elements because they should have already been
|
|
|
|
// reserved for VCC etc.
|
2020-05-01 02:25:24 +08:00
|
|
|
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
|
2016-10-29 03:43:31 +08:00
|
|
|
for (MCPhysReg Reg : AllSGPR128s) {
|
2016-09-01 05:52:21 +08:00
|
|
|
// Pick the first unallocated one. Make sure we don't clobber the other
|
2020-05-01 02:25:24 +08:00
|
|
|
// reserved input we needed. Also for PAL, make sure we don't clobber
|
|
|
|
// the GIT pointer passed in SGPR0 or SGPR8.
|
2020-01-22 06:27:57 +08:00
|
|
|
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
|
2020-05-01 02:25:24 +08:00
|
|
|
!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
|
2016-09-01 05:52:21 +08:00
|
|
|
MRI.replaceRegWith(ScratchRsrcReg, Reg);
|
|
|
|
MFI->setScratchRSrcReg(Reg);
|
|
|
|
return Reg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ScratchRsrcReg;
|
|
|
|
}
|
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
|
|
|
|
return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
|
|
|
|
}
|
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB) const {
|
2015-12-01 05:15:53 +08:00
|
|
|
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
|
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
// FIXME: If we only have SGPR spills, we won't actually be using scratch
|
|
|
|
// memory since these spill to VGPRs. We should be cleaning up these unused
|
|
|
|
// SGPR spill frame indices somewhere.
|
2015-12-01 05:15:53 +08:00
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
// FIXME: We still have implicit uses on SGPR spill instructions in case they
|
|
|
|
// need to spill to vector memory. It's likely that will not happen, but at
|
|
|
|
// this point it appears we need the setup. This part of the prolog should be
|
|
|
|
// emitted after frame indices are eliminated.
|
|
|
|
|
|
|
|
// FIXME: Remove all of the isPhysRegUsed checks
|
2015-12-01 05:15:53 +08:00
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2019-02-22 07:27:46 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2016-06-24 14:30:11 +08:00
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2020-05-01 02:25:24 +08:00
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
2016-02-12 14:31:30 +08:00
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2018-05-30 01:42:50 +08:00
|
|
|
const Function &F = MF.getFunction();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
assert(MFI->isEntryFunction());
|
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
|
2019-06-06 06:20:47 +08:00
|
|
|
// FIXME: Hack to not crash in situations which emitted an error.
|
2020-05-01 02:25:24 +08:00
|
|
|
if (!PreloadedScratchWaveOffsetReg)
|
2019-06-06 06:20:47 +08:00
|
|
|
return;
|
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
// We need to do the replacement of the private segment buffer register even
|
|
|
|
// if there are no stack objects. There could be stores to undef or a
|
|
|
|
// constant without an associated object.
|
|
|
|
//
|
2020-05-01 02:25:24 +08:00
|
|
|
// This will return `Register()` in cases where there are no actual
|
2020-01-22 06:27:57 +08:00
|
|
|
// uses of the SRSRC.
|
2020-11-07 05:00:10 +08:00
|
|
|
Register ScratchRsrcReg;
|
|
|
|
if (!ST.enableFlatScratch())
|
|
|
|
ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
// Make the selected register live throughout the function.
|
2020-05-01 02:25:24 +08:00
|
|
|
if (ScratchRsrcReg) {
|
2020-01-22 06:27:57 +08:00
|
|
|
for (MachineBasicBlock &OtherBB : MF) {
|
|
|
|
if (&OtherBB != &MBB) {
|
|
|
|
OtherBB.addLiveIn(ScratchRsrcReg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now that we have fixed the reserved SRSRC we need to locate the
|
|
|
|
// (potentially) preloaded SRSRC.
|
2020-05-01 02:25:24 +08:00
|
|
|
Register PreloadedScratchRsrcReg;
|
2020-02-25 07:50:21 +08:00
|
|
|
if (ST.isAmdHsaOrMesa(F)) {
|
|
|
|
PreloadedScratchRsrcReg =
|
|
|
|
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
|
2020-05-01 02:25:24 +08:00
|
|
|
if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
|
2020-01-22 06:27:57 +08:00
|
|
|
// We added live-ins during argument lowering, but since they were not
|
|
|
|
// used they were deleted. We're adding the uses now, so add them back.
|
2020-02-25 07:50:21 +08:00
|
|
|
MRI.addLiveIn(PreloadedScratchRsrcReg);
|
|
|
|
MBB.addLiveIn(PreloadedScratchRsrcReg);
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
// Debug location must be unknown since the first debug location is used to
|
|
|
|
// determine the end of the prologue.
|
2015-12-01 05:15:53 +08:00
|
|
|
DebugLoc DL;
|
2016-09-01 05:52:21 +08:00
|
|
|
MachineBasicBlock::iterator I = MBB.begin();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
// We found the SRSRC first because it needs four registers and has an
|
|
|
|
// alignment requirement. If the SRSRC that we found is clobbering with
|
|
|
|
// the scratch wave offset, which may be in a fixed SGPR or a free SGPR
|
|
|
|
// chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
|
|
|
|
// wave offset to a free SGPR.
|
|
|
|
Register ScratchWaveOffsetReg;
|
|
|
|
if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
|
[NFC] Move getAll{S,V}GPR{32,128} methods to SIFrameLowering
Summary:
Future patch needs some of these in multiple places.
The definitions of these can't be in the header and be eligible for
inlining without making the full declaration of GCNSubtarget visible.
I'm not sure what the right trade-off is, but I opted to not bloat
SIRegisterInfo.h
Reviewers: arsenm, cdevadas
Reviewed By: arsenm
Subscribers: RamNalamothu, qcolombet, jvesely, wdng, nhaehnle, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79878
2020-06-18 00:08:09 +08:00
|
|
|
ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
|
2020-05-01 02:25:24 +08:00
|
|
|
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
|
|
|
|
AllSGPRs = AllSGPRs.slice(
|
|
|
|
std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
|
|
|
|
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
|
|
|
|
for (MCPhysReg Reg : AllSGPRs) {
|
|
|
|
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
|
|
|
|
!TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
|
|
|
|
ScratchWaveOffsetReg = Reg;
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
|
|
|
|
.addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
|
|
|
|
}
|
|
|
|
assert(ScratchWaveOffsetReg);
|
|
|
|
|
2020-07-29 05:36:14 +08:00
|
|
|
if (requiresStackPointerReference(MF)) {
|
2020-01-22 06:27:57 +08:00
|
|
|
Register SPReg = MFI->getStackPtrOffsetReg();
|
|
|
|
assert(SPReg != AMDGPU::SP_REG);
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
|
2020-10-22 05:27:03 +08:00
|
|
|
.addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST));
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
2015-12-01 05:15:53 +08:00
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
if (hasFP(MF)) {
|
|
|
|
Register FPReg = MFI->getFrameOffsetReg();
|
|
|
|
assert(FPReg != AMDGPU::FP_REG);
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
|
2016-10-29 03:43:31 +08:00
|
|
|
}
|
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
if (MFI->hasFlatScratchInit() || ScratchRsrcReg) {
|
|
|
|
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
|
|
|
|
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
|
2019-06-06 06:20:47 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
if (MFI->hasFlatScratchInit()) {
|
|
|
|
emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
|
2019-06-06 06:20:47 +08:00
|
|
|
}
|
2020-02-25 07:50:21 +08:00
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
if (ScratchRsrcReg) {
|
2020-01-22 06:27:57 +08:00
|
|
|
emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
|
|
|
|
PreloadedScratchRsrcReg,
|
|
|
|
ScratchRsrcReg, ScratchWaveOffsetReg);
|
2020-02-25 07:50:21 +08:00
|
|
|
}
|
2017-09-29 17:49:35 +08:00
|
|
|
}
|
|
|
|
|
2020-01-22 06:27:57 +08:00
|
|
|
// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
|
2020-02-25 07:50:21 +08:00
|
|
|
void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
|
|
|
|
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|
|
|
const DebugLoc &DL, Register PreloadedScratchRsrcReg,
|
2020-01-22 06:27:57 +08:00
|
|
|
Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
|
2017-09-29 17:49:35 +08:00
|
|
|
|
2020-02-25 07:50:21 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-09-29 17:49:35 +08:00
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
2020-02-25 07:50:21 +08:00
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2018-05-30 01:42:50 +08:00
|
|
|
const Function &Fn = MF.getFunction();
|
2017-09-29 17:49:35 +08:00
|
|
|
|
|
|
|
if (ST.isAmdPalOS()) {
|
|
|
|
// The pointer to the GIT is formed from the offset passed in and either
|
|
|
|
// the amdgpu-git-ptr-high function attribute or the top part of the PC
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
|
2017-09-29 17:49:35 +08:00
|
|
|
|
2020-10-15 19:26:44 +08:00
|
|
|
buildGitPtr(MBB, I, DL, TII, Rsrc01);
|
2017-09-29 17:49:35 +08:00
|
|
|
|
|
|
|
// We now have the GIT ptr - now get the scratch descriptor from the entry
|
[AMDGPU] For OS type AMDPAL, fixed scratch on compute shader
Summary:
For OS type AMDPAL, the scratch descriptor is loaded from offset 0 of
the GIT, whose 32 bit pointer is in s0 (s8 for gfx9 merged shaders).
This commit fixes that to use offset 0x10 instead of offset 0 for a
compute shader, per the PAL ABI spec.
V2: Ensure s0 (s8 for gfx9 merged shader) is marked live-in when loading
scratch descriptor from GIT.
Reviewers: kzhuravl, nhaehnle, timcorringham
Subscribers: kzhuravl, wdng, yaxunl, t-tye, llvm-commits, dstuttard, nhaehnle, arsenm
Differential Revision: https://reviews.llvm.org/D44468
Change-Id: I93dffa647758e37f613bb5e0dfca840d82e6d26f
llvm-svn: 329690
2018-04-10 19:25:15 +08:00
|
|
|
// at offset 0 (or offset 16 for a compute shader).
|
[AMDGPU] Don't create MachinePointerInfos with an UndefValue pointer
Summary:
The only useful information the UndefValue conveys is the address space,
which MachinePointerInfo can represent directly without referring to an
IR value.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71838
2019-12-23 21:42:12 +08:00
|
|
|
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
|
2017-09-29 17:49:35 +08:00
|
|
|
const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
|
|
|
|
auto MMO = MF.getMachineMemOperand(PtrInfo,
|
|
|
|
MachineMemOperand::MOLoad |
|
[Alignment][NFC] Transitionning more getMachineMemOperand call sites
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: arsenm, dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, Jim, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77121
2020-03-31 16:05:00 +08:00
|
|
|
MachineMemOperand::MOInvariant |
|
|
|
|
MachineMemOperand::MODereferenceable,
|
|
|
|
16, Align(4));
|
2018-05-30 01:42:50 +08:00
|
|
|
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
|
[AMDGPU] Fix CS scratch setup on pre-GCN3 ASICs
Summary:
Prior to GCN3 s_load_dword offsets are in dwords rather than bytes.
Thus the scratch buffer descriptor offset must be adjusted for pre-GCN3 ASICs.
Reviewers: nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: sheredom, arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D56496
llvm-svn: 353530
2019-02-08 23:41:11 +08:00
|
|
|
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
|
2020-01-31 07:41:10 +08:00
|
|
|
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
|
2017-09-29 17:49:35 +08:00
|
|
|
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
|
|
|
|
.addReg(Rsrc01)
|
[AMDGPU] Fix CS scratch setup on pre-GCN3 ASICs
Summary:
Prior to GCN3 s_load_dword offsets are in dwords rather than bytes.
Thus the scratch buffer descriptor offset must be adjusted for pre-GCN3 ASICs.
Reviewers: nhaehnle, tpr
Reviewed By: nhaehnle
Subscribers: sheredom, arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D56496
llvm-svn: 353530
2019-02-08 23:41:11 +08:00
|
|
|
.addImm(EncodedOffset) // offset
|
2017-09-29 17:49:35 +08:00
|
|
|
.addImm(0) // glc
|
2019-05-01 06:08:23 +08:00
|
|
|
.addImm(0) // dlc
|
2017-09-29 17:49:35 +08:00
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
|
|
|
|
.addMemOperand(MMO);
|
2020-05-01 02:25:24 +08:00
|
|
|
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
|
2018-10-05 05:02:16 +08:00
|
|
|
assert(!ST.isAmdHsaOrMesa(Fn));
|
2016-09-01 05:52:25 +08:00
|
|
|
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
|
|
|
Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
|
|
// Use relocations to get the pointer, and setup the other bits manually.
|
|
|
|
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
|
|
|
|
|
2017-06-26 11:01:31 +08:00
|
|
|
if (MFI->hasImplicitBufferPtr()) {
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
|
2017-01-25 09:25:13 +08:00
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
|
2017-01-25 09:25:13 +08:00
|
|
|
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, Mov64, Rsrc01)
|
2017-06-26 11:01:31 +08:00
|
|
|
.addReg(MFI->getImplicitBufferPtrUserSGPR())
|
2017-01-25 09:25:13 +08:00
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
} else {
|
|
|
|
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
|
|
|
|
|
[AMDGPU] Don't create MachinePointerInfos with an UndefValue pointer
Summary:
The only useful information the UndefValue conveys is the address space,
which MachinePointerInfo can represent directly without referring to an
IR value.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71838
2019-12-23 21:42:12 +08:00
|
|
|
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
|
[Alignment][NFC] Transitionning more getMachineMemOperand call sites
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: arsenm, dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, Jim, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77121
2020-03-31 16:05:00 +08:00
|
|
|
auto MMO = MF.getMachineMemOperand(
|
|
|
|
PtrInfo,
|
|
|
|
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
|
|
|
|
MachineMemOperand::MODereferenceable,
|
|
|
|
8, Align(4));
|
2017-01-25 09:25:13 +08:00
|
|
|
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
|
2017-06-26 11:01:31 +08:00
|
|
|
.addReg(MFI->getImplicitBufferPtrUserSGPR())
|
2017-01-25 09:25:13 +08:00
|
|
|
.addImm(0) // offset
|
|
|
|
.addImm(0) // glc
|
2019-05-01 06:08:23 +08:00
|
|
|
.addImm(0) // dlc
|
2017-01-25 09:25:13 +08:00
|
|
|
.addMemOperand(MMO)
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
2019-06-01 06:47:36 +08:00
|
|
|
|
|
|
|
MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
|
|
|
|
MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
|
2017-01-25 09:25:13 +08:00
|
|
|
}
|
|
|
|
} else {
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
|
|
|
Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
2017-01-25 09:25:13 +08:00
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
|
|
|
|
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
|
|
|
|
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
|
|
|
|
.addImm(Rsrc23 & 0xffffffff)
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
|
|
|
|
BuildMI(MBB, I, DL, SMovB32, Rsrc3)
|
|
|
|
.addImm(Rsrc23 >> 32)
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
2020-01-22 06:27:57 +08:00
|
|
|
} else if (ST.isAmdHsaOrMesa(Fn)) {
|
2020-05-01 02:25:24 +08:00
|
|
|
assert(PreloadedScratchRsrcReg);
|
2020-01-22 06:27:57 +08:00
|
|
|
|
|
|
|
if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
|
|
|
|
.addReg(PreloadedScratchRsrcReg, RegState::Kill);
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
2020-01-22 06:27:57 +08:00
|
|
|
|
|
|
|
// Add the scratch wave offset into the scratch RSRC.
|
|
|
|
//
|
|
|
|
// We only want to update the first 48 bits, which is the base address
|
|
|
|
// pointer, without touching the adjacent 16 bits of flags. We know this add
|
|
|
|
// cannot carry-out from bit 47, otherwise the scratch allocation would be
|
|
|
|
// impossible to fit in the 48-bit global address space.
|
|
|
|
//
|
|
|
|
// TODO: Evaluate if it is better to just construct an SRD using the flat
|
|
|
|
// scratch init and some constants rather than update the one we are passed.
|
|
|
|
Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
|
|
|
Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
|
|
|
|
|
|
|
// We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
|
|
|
|
// the kernel body via inreg arguments.
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
|
|
|
|
.addReg(ScratchRsrcSub0)
|
|
|
|
.addReg(ScratchWaveOffsetReg)
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
|
|
|
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
|
|
|
|
.addReg(ScratchRsrcSub1)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
2015-12-01 05:15:53 +08:00
|
|
|
}
|
|
|
|
|
2019-06-17 17:13:29 +08:00
|
|
|
bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
2019-06-17 18:20:20 +08:00
|
|
|
switch (ID) {
|
|
|
|
case TargetStackID::Default:
|
|
|
|
case TargetStackID::NoAlloc:
|
|
|
|
case TargetStackID::SGPRSpill:
|
|
|
|
return true;
|
[AArch64] Static (de)allocation of SVE stack objects.
Adds support to AArch64FrameLowering to allocate fixed-stack SVE objects.
The focus of this patch is purely to allow the stack frame to
allocate/deallocate space for scalable SVE objects. More dynamic
allocation (at compile-time, i.e. determining placement of SVE objects
on the stack), or resolving frame-index references that include
scalable-sized offsets, are left for subsequent patches.
SVE objects are allocated in the stack frame as a separate region below
the callee-save area, and above the alignment gap. This is done so that
the SVE objects can be accessed directly from the FP at (runtime)
VL-based offsets to benefit from using the VL-scaled addressing modes.
The layout looks as follows:
+-------------+
| stack arg |
+-------------+
| Callee Saves|
| X29, X30 | (if available)
|-------------| <- FP (if available)
| : |
| SVE area |
| : |
+-------------+
|/////////////| alignment gap.
| : |
| Stack objs |
| : |
+-------------+ <- SP after call and frame-setup
SVE and non-SVE stack objects are distinguished using different
StackIDs. The offsets for objects with TargetStackID::SVEVector should be
interpreted as purely scalable offsets within their respective SVE region.
Reviewers: thegameg, rovka, t.p.northover, efriedma, rengolin, greened
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D61437
llvm-svn: 373585
2019-10-03 19:33:50 +08:00
|
|
|
case TargetStackID::SVEVector:
|
|
|
|
return false;
|
2019-06-17 18:20:20 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("Invalid TargetStackID::Value");
|
2019-06-17 17:13:29 +08:00
|
|
|
}
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
// Activate all lanes, returns saved exec.
|
|
|
|
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
|
|
|
|
MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
bool IsProlog) {
|
|
|
|
Register ScratchExecCopy;
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
DebugLoc DL;
|
|
|
|
|
|
|
|
if (LiveRegs.empty()) {
|
|
|
|
if (IsProlog) {
|
|
|
|
LiveRegs.init(TRI);
|
|
|
|
LiveRegs.addLiveIns(MBB);
|
|
|
|
if (FuncInfo->SGPRForFPSaveRestoreCopy)
|
|
|
|
LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
|
2020-04-21 17:34:33 +08:00
|
|
|
|
|
|
|
if (FuncInfo->SGPRForBPSaveRestoreCopy)
|
|
|
|
LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
|
2020-05-08 05:56:37 +08:00
|
|
|
} else {
|
|
|
|
// In epilog.
|
|
|
|
LiveRegs.init(*ST.getRegisterInfo());
|
|
|
|
LiveRegs.addLiveOuts(MBB);
|
|
|
|
LiveRegs.stepBackward(*MBBI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ScratchExecCopy = findScratchNonCalleeSaveRegister(
|
|
|
|
MRI, LiveRegs, *TRI.getWaveMaskRegClass());
|
|
|
|
|
|
|
|
if (!IsProlog)
|
|
|
|
LiveRegs.removeReg(ScratchExecCopy);
|
|
|
|
|
|
|
|
const unsigned OrSaveExec =
|
|
|
|
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
|
|
|
|
|
|
|
|
return ScratchExecCopy;
|
|
|
|
}
|
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB) const {
|
2018-03-30 05:30:06 +08:00
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
2017-06-27 01:53:59 +08:00
|
|
|
if (FuncInfo->isEntryFunction()) {
|
2017-05-18 05:56:25 +08:00
|
|
|
emitEntryFunctionPrologue(MF, MBB);
|
2017-06-27 01:53:59 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
2019-07-09 03:03:38 +08:00
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-06-27 01:53:59 +08:00
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2018-03-30 05:30:06 +08:00
|
|
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
2017-06-27 01:53:59 +08:00
|
|
|
|
2020-04-22 03:06:53 +08:00
|
|
|
Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
|
|
|
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
|
2020-04-21 17:34:33 +08:00
|
|
|
Register BasePtrReg =
|
|
|
|
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
|
2019-05-25 02:18:51 +08:00
|
|
|
LivePhysRegs LiveRegs;
|
2017-06-27 01:53:59 +08:00
|
|
|
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.begin();
|
|
|
|
DebugLoc DL;
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
bool HasFP = false;
|
2020-04-21 17:34:33 +08:00
|
|
|
bool HasBP = false;
|
2018-03-30 05:30:06 +08:00
|
|
|
uint32_t NumBytes = MFI.getStackSize();
|
|
|
|
uint32_t RoundedSize = NumBytes;
|
2019-07-09 03:03:38 +08:00
|
|
|
// To avoid clobbering VGPRs in lanes that weren't active on function entry,
|
|
|
|
// turn on all lanes before doing the spill to memory.
|
2020-04-22 03:06:53 +08:00
|
|
|
Register ScratchExecCopy;
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
|
|
|
|
bool SpillFPToMemory = false;
|
|
|
|
// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
|
|
|
|
// Otherwise we are spilling the FP to memory.
|
|
|
|
if (HasFPSaveIndex) {
|
|
|
|
SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
|
|
|
|
TargetStackID::SGPRSpill;
|
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
|
|
|
|
bool SpillBPToMemory = false;
|
|
|
|
// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
|
|
|
|
// Otherwise we are spilling the BP to memory.
|
|
|
|
if (HasBPSaveIndex) {
|
|
|
|
SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
|
|
|
|
TargetStackID::SGPRSpill;
|
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
// Emit the copy if we need an FP, and are using a free SGPR to save it.
|
2020-04-22 03:06:53 +08:00
|
|
|
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
2019-07-09 03:03:38 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
|
|
|
|
.addReg(FramePtrReg)
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
2020-04-21 17:34:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Emit the copy if we need a BP, and are using a free SGPR to save it.
|
|
|
|
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
|
|
|
|
FuncInfo->SGPRForBPSaveRestoreCopy)
|
|
|
|
.addReg(BasePtrReg)
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If a copy has been emitted for FP and/or BP, Make the SGPRs
|
|
|
|
// used in the copy instructions live throughout the function.
|
|
|
|
SmallVector<MCPhysReg, 2> TempSGPRs;
|
|
|
|
if (FuncInfo->SGPRForFPSaveRestoreCopy)
|
|
|
|
TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
|
|
|
|
|
|
|
|
if (FuncInfo->SGPRForBPSaveRestoreCopy)
|
|
|
|
TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
|
|
|
|
|
|
|
|
if (!TempSGPRs.empty()) {
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
|
|
for (MCPhysReg Reg : TempSGPRs)
|
|
|
|
MBB.addLiveIn(Reg);
|
|
|
|
|
|
|
|
MBB.sortUniqueLiveIns();
|
|
|
|
}
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
|
|
|
|
: FuncInfo->getSGPRSpillVGPRs()) {
|
|
|
|
if (!Reg.FI.hasValue())
|
|
|
|
continue;
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
if (!ScratchExecCopy)
|
|
|
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
2019-07-09 03:03:38 +08:00
|
|
|
FuncInfo->getScratchRSrcReg(),
|
|
|
|
StackPtrReg,
|
|
|
|
Reg.FI.getValue());
|
|
|
|
}
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
if (HasFPSaveIndex && SpillFPToMemory) {
|
2020-05-12 11:02:54 +08:00
|
|
|
assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue()));
|
2020-05-08 05:56:37 +08:00
|
|
|
|
|
|
|
if (!ScratchExecCopy)
|
|
|
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
|
|
|
|
|
|
|
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
|
|
|
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
|
|
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
|
|
|
.addReg(FramePtrReg);
|
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR,
|
2020-05-08 05:56:37 +08:00
|
|
|
FuncInfo->getScratchRSrcReg(), StackPtrReg,
|
|
|
|
FuncInfo->FramePointerSaveIndex.getValue());
|
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
if (HasBPSaveIndex && SpillBPToMemory) {
|
|
|
|
assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex));
|
|
|
|
|
|
|
|
if (!ScratchExecCopy)
|
|
|
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
|
|
|
|
|
|
|
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
|
|
|
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
|
|
|
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
|
|
|
.addReg(BasePtrReg);
|
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR,
|
2020-04-21 17:34:33 +08:00
|
|
|
FuncInfo->getScratchRSrcReg(), StackPtrReg,
|
|
|
|
*FuncInfo->BasePointerSaveIndex);
|
|
|
|
}
|
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
if (ScratchExecCopy) {
|
2019-07-09 03:03:38 +08:00
|
|
|
// FIXME: Split block and make terminator.
|
|
|
|
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
2020-04-22 03:06:53 +08:00
|
|
|
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
2019-07-09 03:03:38 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(ScratchExecCopy, RegState::Kill);
|
2019-07-09 03:03:38 +08:00
|
|
|
LiveRegs.addReg(ScratchExecCopy);
|
|
|
|
}
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
// In this case, spill the FP to a reserved VGPR.
|
|
|
|
if (HasFPSaveIndex && !SpillFPToMemory) {
|
2019-07-09 03:03:38 +08:00
|
|
|
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
2020-05-08 05:56:37 +08:00
|
|
|
assert(!MFI.isDeadObjectIndex(FI));
|
|
|
|
|
|
|
|
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
|
|
|
|
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
|
|
|
|
FuncInfo->getSGPRToVGPRSpills(FI);
|
2019-07-09 03:03:38 +08:00
|
|
|
assert(Spill.size() == 1);
|
|
|
|
|
|
|
|
// Save FP before setting it up.
|
|
|
|
// FIXME: This should respect spillSGPRToVGPR;
|
2020-10-29 20:10:56 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(FramePtrReg)
|
|
|
|
.addImm(Spill[0].Lane)
|
|
|
|
.addReg(Spill[0].VGPR, RegState::Undef);
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
2018-03-30 05:30:06 +08:00
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
// In this case, spill the BP to a reserved VGPR.
|
|
|
|
if (HasBPSaveIndex && !SpillBPToMemory) {
|
|
|
|
const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
|
|
|
|
assert(!MFI.isDeadObjectIndex(BasePtrFI));
|
|
|
|
|
|
|
|
assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
|
|
|
|
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
|
|
|
|
FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
|
|
|
|
assert(Spill.size() == 1);
|
|
|
|
|
|
|
|
// Save BP before setting it up.
|
|
|
|
// FIXME: This should respect spillSGPRToVGPR;
|
2020-10-29 20:10:56 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
|
2020-04-21 17:34:33 +08:00
|
|
|
.addReg(BasePtrReg)
|
|
|
|
.addImm(Spill[0].Lane)
|
|
|
|
.addReg(Spill[0].VGPR, RegState::Undef);
|
|
|
|
}
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
if (TRI.needsStackRealignment(MF)) {
|
|
|
|
HasFP = true;
|
[Alignment][NFC] Deprecate getMaxAlignment
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: jholewinski, arsenm, dschuff, jyknight, sdardis, nemanjai, jvesely, nhaehnle, sbc100, jgravelle-google, hiraditya, aheejin, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76348
2020-03-18 17:50:38 +08:00
|
|
|
const unsigned Alignment = MFI.getMaxAlign().value();
|
2018-03-30 05:30:06 +08:00
|
|
|
|
|
|
|
RoundedSize += Alignment;
|
2019-07-09 03:03:38 +08:00
|
|
|
if (LiveRegs.empty()) {
|
|
|
|
LiveRegs.init(TRI);
|
|
|
|
LiveRegs.addLiveIns(MBB);
|
|
|
|
LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
|
2020-04-21 17:34:33 +08:00
|
|
|
LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
2018-03-30 05:30:06 +08:00
|
|
|
|
2020-04-22 03:06:53 +08:00
|
|
|
Register ScratchSPReg = findScratchNonCalleeSaveRegister(
|
2019-07-09 03:03:38 +08:00
|
|
|
MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
|
2020-04-21 17:34:33 +08:00
|
|
|
assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy &&
|
|
|
|
ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy);
|
2018-03-30 05:30:06 +08:00
|
|
|
|
|
|
|
// s_add_u32 tmp_reg, s32, NumBytes
|
|
|
|
// s_and_b32 s32, tmp_reg, 0b111...0000
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(StackPtrReg)
|
2020-10-22 05:27:03 +08:00
|
|
|
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
|
2020-05-08 05:56:37 +08:00
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
2018-03-30 05:30:06 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(ScratchSPReg, RegState::Kill)
|
2020-10-22 05:27:03 +08:00
|
|
|
.addImm(-Alignment * getScratchScaleFactor(ST))
|
2020-05-08 05:56:37 +08:00
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
2018-03-30 05:30:06 +08:00
|
|
|
FuncInfo->setIsStackRealigned(true);
|
2019-06-06 06:20:47 +08:00
|
|
|
} else if ((HasFP = hasFP(MF))) {
|
2017-06-27 01:53:59 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(StackPtrReg)
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
2017-06-27 01:53:59 +08:00
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
// If we need a base pointer, set it up here. It's whatever the value of
|
|
|
|
// the stack pointer is at this point. Any variable size objects will be
|
|
|
|
// allocated after this, so we can still use the base pointer to reference
|
|
|
|
// the incoming arguments.
|
|
|
|
if ((HasBP = TRI.hasBasePointer(MF))) {
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
|
|
|
|
.addReg(StackPtrReg)
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
}
|
|
|
|
|
2019-06-06 06:20:47 +08:00
|
|
|
if (HasFP && RoundedSize != 0) {
|
2017-06-27 01:53:59 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(StackPtrReg)
|
2020-10-22 05:27:03 +08:00
|
|
|
.addImm(RoundedSize * getScratchScaleFactor(ST))
|
2020-05-08 05:56:37 +08:00
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
2017-06-27 01:53:59 +08:00
|
|
|
}
|
2017-08-02 09:52:45 +08:00
|
|
|
|
2020-04-22 03:06:53 +08:00
|
|
|
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
|
2019-07-09 06:00:33 +08:00
|
|
|
FuncInfo->FramePointerSaveIndex)) &&
|
|
|
|
"Needed to save FP but didn't save it anywhere");
|
2019-05-29 00:46:02 +08:00
|
|
|
|
2020-04-22 03:06:53 +08:00
|
|
|
assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
|
2019-07-09 06:00:33 +08:00
|
|
|
!FuncInfo->FramePointerSaveIndex)) &&
|
|
|
|
"Saved FP but didn't need it");
|
2020-04-21 17:34:33 +08:00
|
|
|
|
|
|
|
assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
|
|
|
|
FuncInfo->BasePointerSaveIndex)) &&
|
|
|
|
"Needed to save BP but didn't save it anywhere");
|
|
|
|
|
|
|
|
assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
|
|
|
|
!FuncInfo->BasePointerSaveIndex)) &&
|
|
|
|
"Saved BP but didn't need it");
|
2017-05-18 05:56:25 +08:00
|
|
|
}
|
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB) const {
|
2017-06-27 01:53:59 +08:00
|
|
|
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
if (FuncInfo->isEntryFunction())
|
|
|
|
return;
|
2016-06-24 14:30:11 +08:00
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-08-02 09:52:45 +08:00
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
2019-07-09 03:03:38 +08:00
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
2020-04-21 17:34:33 +08:00
|
|
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
2017-08-02 09:52:45 +08:00
|
|
|
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
2019-07-09 03:03:38 +08:00
|
|
|
LivePhysRegs LiveRegs;
|
2019-05-25 02:18:51 +08:00
|
|
|
DebugLoc DL;
|
2017-08-02 09:52:45 +08:00
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
|
|
uint32_t NumBytes = MFI.getStackSize();
|
[Alignment][NFC] Deprecate getMaxAlignment
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: jholewinski, arsenm, dschuff, jyknight, sdardis, nemanjai, jvesely, nhaehnle, sbc100, jgravelle-google, hiraditya, aheejin, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76348
2020-03-18 17:50:38 +08:00
|
|
|
uint32_t RoundedSize = FuncInfo->isStackRealigned()
|
|
|
|
? NumBytes + MFI.getMaxAlign().value()
|
|
|
|
: NumBytes;
|
2020-05-08 05:56:37 +08:00
|
|
|
const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
|
|
|
|
const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
|
2020-04-21 17:34:33 +08:00
|
|
|
const Register BasePtrReg =
|
|
|
|
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
|
2020-05-08 05:56:37 +08:00
|
|
|
|
|
|
|
bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
|
|
|
|
bool SpillFPToMemory = false;
|
|
|
|
if (HasFPSaveIndex) {
|
|
|
|
SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
|
|
|
|
TargetStackID::SGPRSpill;
|
|
|
|
}
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
|
|
|
|
bool SpillBPToMemory = false;
|
|
|
|
if (HasBPSaveIndex) {
|
|
|
|
SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
|
|
|
|
TargetStackID::SGPRSpill;
|
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
if (RoundedSize != 0 && hasFP(MF)) {
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
|
|
|
|
.addReg(StackPtrReg)
|
2020-10-22 05:27:03 +08:00
|
|
|
.addImm(RoundedSize * getScratchScaleFactor(ST))
|
2019-07-09 03:03:38 +08:00
|
|
|
.setMIFlag(MachineInstr::FrameDestroy);
|
|
|
|
}
|
|
|
|
|
2020-04-22 03:06:53 +08:00
|
|
|
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
|
2020-05-08 05:56:37 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
|
|
|
|
.addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
|
|
|
|
.addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
|
|
|
|
.setMIFlag(MachineInstr::FrameSetup);
|
|
|
|
}
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
Register ScratchExecCopy;
|
|
|
|
if (HasFPSaveIndex) {
|
2019-07-09 03:03:38 +08:00
|
|
|
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
|
2020-05-08 05:56:37 +08:00
|
|
|
assert(!MFI.isDeadObjectIndex(FI));
|
|
|
|
if (SpillFPToMemory) {
|
|
|
|
if (!ScratchExecCopy)
|
|
|
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
|
|
|
|
|
|
|
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
|
|
|
|
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
2020-10-22 05:27:03 +08:00
|
|
|
buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR,
|
2020-05-08 05:56:37 +08:00
|
|
|
FuncInfo->getScratchRSrcReg(), StackPtrReg, FI);
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
|
|
|
|
.addReg(TempVGPR, RegState::Kill);
|
|
|
|
} else {
|
|
|
|
// Reload from VGPR spill.
|
|
|
|
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
|
|
|
|
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
|
|
|
|
FuncInfo->getSGPRToVGPRSpills(FI);
|
|
|
|
assert(Spill.size() == 1);
|
2020-10-29 20:10:56 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(Spill[0].VGPR)
|
|
|
|
.addImm(Spill[0].Lane);
|
|
|
|
}
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
if (HasBPSaveIndex) {
|
|
|
|
const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
|
|
|
|
assert(!MFI.isDeadObjectIndex(BasePtrFI));
|
|
|
|
if (SpillBPToMemory) {
|
|
|
|
if (!ScratchExecCopy)
|
|
|
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
|
|
|
|
|
|
|
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
|
|
|
|
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
2020-10-22 05:27:03 +08:00
|
|
|
buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR,
|
2020-04-21 17:34:33 +08:00
|
|
|
FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
|
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
|
|
|
|
.addReg(TempVGPR, RegState::Kill);
|
|
|
|
} else {
|
|
|
|
// Reload from VGPR spill.
|
|
|
|
assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
|
|
|
|
ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
|
|
|
|
FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
|
|
|
|
assert(Spill.size() == 1);
|
2020-10-29 20:10:56 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
|
2020-04-21 17:34:33 +08:00
|
|
|
.addReg(Spill[0].VGPR)
|
|
|
|
.addImm(Spill[0].Lane);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg :
|
|
|
|
FuncInfo->getSGPRSpillVGPRs()) {
|
2019-05-29 00:46:02 +08:00
|
|
|
if (!Reg.FI.hasValue())
|
|
|
|
continue;
|
2019-05-25 02:18:51 +08:00
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
if (!ScratchExecCopy)
|
|
|
|
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
2019-05-25 02:18:51 +08:00
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
2020-05-08 05:56:37 +08:00
|
|
|
FuncInfo->getScratchRSrcReg(), StackPtrReg,
|
|
|
|
Reg.FI.getValue());
|
2019-05-29 00:46:02 +08:00
|
|
|
}
|
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
if (ScratchExecCopy) {
|
2019-05-25 02:18:51 +08:00
|
|
|
// FIXME: Split block and make terminator.
|
2019-06-17 01:13:09 +08:00
|
|
|
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
2020-04-08 04:33:58 +08:00
|
|
|
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
2019-06-17 01:13:09 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
|
2020-05-08 05:56:37 +08:00
|
|
|
.addReg(ScratchExecCopy, RegState::Kill);
|
2017-06-27 01:53:59 +08:00
|
|
|
}
|
2016-06-24 14:30:11 +08:00
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
#ifndef NDEBUG
|
2020-08-20 22:18:11 +08:00
|
|
|
static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
|
|
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
2019-07-09 03:03:38 +08:00
|
|
|
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
|
|
|
|
I != E; ++I) {
|
|
|
|
if (!MFI.isDeadObjectIndex(I) &&
|
|
|
|
MFI.getStackID(I) == TargetStackID::SGPRSpill &&
|
2020-08-20 22:18:11 +08:00
|
|
|
(I != FuncInfo->FramePointerSaveIndex &&
|
|
|
|
I != FuncInfo->BasePointerSaveIndex)) {
|
2019-07-09 03:03:38 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-11-04 16:56:54 +08:00
|
|
|
StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
|
|
|
int FI,
|
|
|
|
Register &FrameReg) const {
|
2018-07-12 04:59:01 +08:00
|
|
|
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
2017-03-11 03:39:07 +08:00
|
|
|
|
|
|
|
FrameReg = RI->getFrameRegister(MF);
|
2020-11-04 16:56:54 +08:00
|
|
|
return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
|
2017-03-11 03:39:07 +08:00
|
|
|
}
|
|
|
|
|
2015-11-07 02:17:45 +08:00
|
|
|
void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
|
|
|
MachineFunction &MF,
|
|
|
|
RegScavenger *RS) const {
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
2015-12-01 05:15:53 +08:00
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2019-07-09 03:03:38 +08:00
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
2017-02-23 06:23:32 +08:00
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
2019-07-12 05:54:13 +08:00
|
|
|
FuncInfo->removeDeadFrameIndices(MFI);
|
2020-08-20 22:18:11 +08:00
|
|
|
assert(allSGPRSpillsAreDead(MF) &&
|
2019-07-09 03:03:38 +08:00
|
|
|
"SGPR spill should have been removed in SILowerSGPRSpills");
|
2019-04-02 17:46:52 +08:00
|
|
|
|
2019-07-04 07:32:29 +08:00
|
|
|
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
|
|
|
|
// but currently hasNonSpillStackObjects is set only from source
|
|
|
|
// allocas. Stack temps produced from legalization are not counted currently.
|
|
|
|
if (!allStackObjectsAreDead(MFI)) {
|
2017-02-23 06:23:32 +08:00
|
|
|
assert(RS && "RegScavenger required if spilling");
|
|
|
|
|
2019-06-06 06:37:50 +08:00
|
|
|
if (FuncInfo->isEntryFunction()) {
|
|
|
|
int ScavengeFI = MFI.CreateFixedObject(
|
2019-07-09 03:03:38 +08:00
|
|
|
TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
|
2019-06-06 06:37:50 +08:00
|
|
|
RS->addScavengingFrameIndex(ScavengeFI);
|
|
|
|
} else {
|
|
|
|
int ScavengeFI = MFI.CreateStackObject(
|
2020-07-01 15:28:11 +08:00
|
|
|
TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
|
|
|
|
TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false);
|
2019-06-06 06:37:50 +08:00
|
|
|
RS->addScavengingFrameIndex(ScavengeFI);
|
|
|
|
}
|
2017-02-23 05:05:25 +08:00
|
|
|
}
|
2015-11-07 02:17:45 +08:00
|
|
|
}
|
2016-06-25 11:11:28 +08:00
|
|
|
|
2019-07-04 07:32:29 +08:00
|
|
|
// Only report VGPRs to generic code.
|
|
|
|
void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
2019-07-09 03:03:38 +08:00
|
|
|
BitVector &SavedVGPRs,
|
2017-09-14 07:47:01 +08:00
|
|
|
RegScavenger *RS) const {
|
2019-07-09 03:03:38 +08:00
|
|
|
TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
|
|
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2019-07-12 07:53:30 +08:00
|
|
|
if (MFI->isEntryFunction())
|
|
|
|
return;
|
|
|
|
|
2020-05-08 05:56:37 +08:00
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
2019-07-04 07:32:29 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
// Ignore the SGPRs the default implementation found.
|
|
|
|
SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
|
|
|
|
|
|
|
|
// hasFP only knows about stack objects that already exist. We're now
|
|
|
|
// determining the stack slots that will be created, so we have to predict
|
|
|
|
// them. Stack objects force FP usage with calls.
|
|
|
|
//
|
|
|
|
// Note a new VGPR CSR may be introduced if one is used for the spill, but we
|
|
|
|
// don't want to report it here.
|
|
|
|
//
|
|
|
|
// FIXME: Is this really hasReservedCallFrame?
|
|
|
|
const bool WillHaveFP =
|
|
|
|
FrameInfo.hasCalls() &&
|
|
|
|
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
|
|
|
|
|
|
|
|
// VGPRs used for SGPR spilling need to be specially inserted in the prolog,
|
|
|
|
// so don't allow the default insertion to handle them.
|
2019-07-04 07:32:29 +08:00
|
|
|
for (auto SSpill : MFI->getSGPRSpillVGPRs())
|
2019-07-09 03:03:38 +08:00
|
|
|
SavedVGPRs.reset(SSpill.VGPR);
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
LivePhysRegs LiveRegs;
|
|
|
|
LiveRegs.init(*TRI);
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
if (WillHaveFP || hasFP(MF)) {
|
|
|
|
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
|
|
|
|
MFI->FramePointerSaveIndex, true);
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
if (TRI->hasBasePointer(MF)) {
|
|
|
|
if (MFI->SGPRForFPSaveRestoreCopy)
|
|
|
|
LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
|
|
|
|
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
|
|
|
|
MFI->BasePointerSaveIndex, false);
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
2019-07-04 07:32:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
|
|
|
|
BitVector &SavedRegs,
|
|
|
|
RegScavenger *RS) const {
|
|
|
|
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
|
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2019-07-12 07:53:30 +08:00
|
|
|
if (MFI->isEntryFunction())
|
|
|
|
return;
|
2019-07-04 07:32:29 +08:00
|
|
|
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
2017-09-14 07:47:01 +08:00
|
|
|
|
|
|
|
// The SP is specifically managed and we don't want extra spills of it.
|
|
|
|
SavedRegs.reset(MFI->getStackPtrOffsetReg());
|
2019-07-04 07:32:29 +08:00
|
|
|
SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
|
2017-09-14 07:47:01 +08:00
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
bool SIFrameLowering::assignCalleeSavedSpillSlots(
|
|
|
|
MachineFunction &MF, const TargetRegisterInfo *TRI,
|
|
|
|
std::vector<CalleeSavedInfo> &CSI) const {
|
|
|
|
if (CSI.empty())
|
|
|
|
return true; // Early exit if no callee saved registers are modified!
|
|
|
|
|
|
|
|
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
2020-04-21 17:34:33 +08:00
|
|
|
if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
|
|
|
|
!FuncInfo->SGPRForBPSaveRestoreCopy)
|
2019-07-09 03:03:38 +08:00
|
|
|
return false;
|
|
|
|
|
2020-04-21 17:34:33 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIRegisterInfo *RI = ST.getRegisterInfo();
|
|
|
|
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
|
|
|
|
Register BasePtrReg = RI->getBaseRegister();
|
|
|
|
unsigned NumModifiedRegs = 0;
|
|
|
|
|
|
|
|
if (FuncInfo->SGPRForFPSaveRestoreCopy)
|
|
|
|
NumModifiedRegs++;
|
|
|
|
if (FuncInfo->SGPRForBPSaveRestoreCopy)
|
|
|
|
NumModifiedRegs++;
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
for (auto &CS : CSI) {
|
2020-04-21 17:34:33 +08:00
|
|
|
if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
|
|
|
|
CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
|
|
|
|
if (--NumModifiedRegs)
|
|
|
|
break;
|
|
|
|
} else if (CS.getReg() == BasePtrReg &&
|
|
|
|
FuncInfo->SGPRForBPSaveRestoreCopy) {
|
|
|
|
CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
|
|
|
|
if (--NumModifiedRegs)
|
|
|
|
break;
|
2019-07-09 03:03:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-08-02 03:54:18 +08:00
|
|
|
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
|
|
|
|
MachineFunction &MF,
|
|
|
|
MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator I) const {
|
|
|
|
int64_t Amount = I->getOperand(0).getImm();
|
|
|
|
if (Amount == 0)
|
|
|
|
return MBB.erase(I);
|
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-08-02 03:54:18 +08:00
|
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
|
|
const DebugLoc &DL = I->getDebugLoc();
|
|
|
|
unsigned Opc = I->getOpcode();
|
|
|
|
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
|
|
|
|
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
|
|
|
|
|
2019-06-26 04:53:35 +08:00
|
|
|
if (!hasReservedCallFrame(MF)) {
|
[Alignment][NFC] Use llvmTargetFrameLowering::getStackAlign
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Reviewed By: courbet
Subscribers: wuzish, arsenm, jyknight, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, fedor.sergeev, jrtc27, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76613
2020-03-27 01:51:25 +08:00
|
|
|
Amount = alignTo(Amount, getStackAlign());
|
2017-08-02 03:54:18 +08:00
|
|
|
assert(isUInt<32>(Amount) && "exceeded stack address space size");
|
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
2020-04-22 03:06:53 +08:00
|
|
|
Register SPReg = MFI->getStackPtrOffsetReg();
|
2017-08-02 03:54:18 +08:00
|
|
|
|
|
|
|
unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
|
|
|
|
BuildMI(MBB, I, DL, TII->get(Op), SPReg)
|
|
|
|
.addReg(SPReg)
|
2020-10-22 05:27:03 +08:00
|
|
|
.addImm(Amount * getScratchScaleFactor(ST));
|
2017-08-02 03:54:18 +08:00
|
|
|
} else if (CalleePopAmount != 0) {
|
|
|
|
llvm_unreachable("is this used?");
|
|
|
|
}
|
|
|
|
|
|
|
|
return MBB.erase(I);
|
|
|
|
}
|
|
|
|
|
2020-07-29 05:36:14 +08:00
|
|
|
/// Returns true if the frame will require a reference to the stack pointer.
|
|
|
|
///
|
|
|
|
/// This is the set of conditions common to setting up the stack pointer in a
|
|
|
|
/// kernel, and for using a frame pointer in a callable function.
|
|
|
|
///
|
|
|
|
/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
|
|
|
|
/// references SP.
|
|
|
|
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
|
|
|
|
return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
|
|
|
|
}
|
|
|
|
|
|
|
|
// The FP for kernels is always known 0, so we never really need to setup an
|
|
|
|
// explicit register for it. However, DisableFramePointerElim will force us to
|
|
|
|
// use a register for it.
|
2017-06-27 01:53:59 +08:00
|
|
|
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
2020-01-22 06:27:57 +08:00
|
|
|
|
|
|
|
// For entry functions we can use an immediate offset in most cases, so the
|
|
|
|
// presence of calls doesn't imply we need a distinct frame pointer.
|
|
|
|
if (MFI.hasCalls() &&
|
|
|
|
!MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
|
2019-06-06 06:20:47 +08:00
|
|
|
// All offsets are unsigned, so need to be addressed in the same direction
|
|
|
|
// as stack growth.
|
2019-07-09 03:03:38 +08:00
|
|
|
|
|
|
|
// FIXME: This function is pretty broken, since it can be called before the
|
|
|
|
// frame layout is determined or CSR spills are inserted.
|
2020-01-22 06:27:57 +08:00
|
|
|
return MFI.getStackSize() != 0;
|
2019-06-06 06:20:47 +08:00
|
|
|
}
|
2017-06-27 01:53:59 +08:00
|
|
|
|
2020-07-29 05:36:14 +08:00
|
|
|
return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
|
2019-06-21 01:03:23 +08:00
|
|
|
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
|
|
|
|
MF.getTarget().Options.DisableFramePointerElim(MF);
|
2017-06-27 01:53:59 +08:00
|
|
|
}
|
2020-07-29 05:36:14 +08:00
|
|
|
|
|
|
|
// This is essentially a reduced version of hasFP for entry functions. Since the
|
|
|
|
// stack pointer is known 0 on entry to kernels, we never really need an FP
|
|
|
|
// register. We may need to initialize the stack pointer depending on the frame
|
|
|
|
// properties, which logically overlaps many of the cases where an ordinary
|
|
|
|
// function would require an FP.
|
|
|
|
bool SIFrameLowering::requiresStackPointerReference(
|
|
|
|
const MachineFunction &MF) const {
|
|
|
|
// Callable functions always require a stack pointer reference.
|
|
|
|
assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
|
|
|
|
"only expected to call this for entry points");
|
|
|
|
|
|
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
|
|
|
|
|
|
// Entry points ordinarily don't need to initialize SP. We have to set it up
|
|
|
|
// for callees if there are any. Also note tail calls are impossible/don't
|
|
|
|
// make any sense for kernels.
|
|
|
|
if (MFI.hasCalls())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// We still need to initialize the SP if we're doing anything weird that
|
|
|
|
// references the SP, like variable sized stack objects.
|
|
|
|
return frameTriviallyRequiresSP(MFI);
|
|
|
|
}
|