2017-08-08 08:47:13 +08:00
|
|
|
//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
2020-03-12 04:13:52 +08:00
|
|
|
#include "AMDGPUTargetMachine.h"
|
2014-09-24 09:33:17 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "SIRegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "Utils/AMDGPUBaseInfo.h"
|
|
|
|
#include "llvm/ADT/Optional.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2014-08-22 04:40:54 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2013-11-28 05:23:35 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "llvm/IR/CallingConv.h"
|
2014-05-02 23:41:42 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include <cassert>
|
|
|
|
#include <vector>
|
2013-11-28 05:23:35 +08:00
|
|
|
|
|
|
|
#define MAX_LANES 64
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
2013-04-02 05:47:53 +08:00
|
|
|
: AMDGPUMachineFunction(MF),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentBuffer(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
DispatchPtr(false),
|
|
|
|
QueuePtr(false),
|
2015-12-01 05:16:03 +08:00
|
|
|
KernargSegmentPtr(false),
|
2016-07-23 01:01:30 +08:00
|
|
|
DispatchID(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
FlatScratchInit(false),
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkGroupIDX(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkGroupIDY(false),
|
|
|
|
WorkGroupIDZ(false),
|
|
|
|
WorkGroupInfo(false),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentWaveByteOffset(false),
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkItemIDX(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkItemIDY(false),
|
2017-01-25 09:25:13 +08:00
|
|
|
WorkItemIDZ(false),
|
2017-08-04 07:12:44 +08:00
|
|
|
ImplicitBufferPtr(false),
|
2017-09-29 17:49:35 +08:00
|
|
|
ImplicitArgPtr(false),
|
2018-02-10 00:57:57 +08:00
|
|
|
GITPtrHigh(0xffffffff),
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
HighBitsOf32BitAddress(0),
|
|
|
|
GDSSize(0) {
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-12-16 06:22:58 +08:00
|
|
|
const Function &F = MF.getFunction();
|
|
|
|
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
|
|
|
|
WavesPerEU = ST.getWavesPerEU(F);
|
2015-11-26 04:55:12 +08:00
|
|
|
|
2020-06-18 20:39:13 +08:00
|
|
|
Occupancy = ST.computeOccupancy(F, getLDSSize());
|
2018-07-20 17:05:08 +08:00
|
|
|
CallingConv::ID CC = F.getCallingConv();
|
2020-03-12 05:12:20 +08:00
|
|
|
|
|
|
|
// FIXME: Should have analysis or something rather than attribute to detect
|
|
|
|
// calls.
|
2020-05-20 02:32:31 +08:00
|
|
|
const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
|
2018-07-20 17:05:08 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
// Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
|
|
|
|
// have any calls.
|
|
|
|
const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
|
|
|
|
(!isEntryFunction() || HasCalls);
|
|
|
|
|
2018-07-20 17:05:08 +08:00
|
|
|
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
|
|
|
|
if (!F.arg_empty())
|
|
|
|
KernargSegmentPtr = true;
|
|
|
|
WorkGroupIDX = true;
|
|
|
|
WorkItemIDX = true;
|
|
|
|
} else if (CC == CallingConv::AMDGPU_PS) {
|
|
|
|
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
|
|
|
|
}
|
2018-05-31 13:36:04 +08:00
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
if (!isEntryFunction()) {
|
2019-07-09 03:05:19 +08:00
|
|
|
// TODO: Pick a high register, and shift down, similar to a kernel.
|
2020-03-04 09:39:47 +08:00
|
|
|
FrameOffsetReg = AMDGPU::SGPR33;
|
2017-06-27 01:53:59 +08:00
|
|
|
StackPtrOffsetReg = AMDGPU::SGPR32;
|
2017-07-19 00:44:56 +08:00
|
|
|
|
2020-11-07 05:00:10 +08:00
|
|
|
if (!ST.enableFlatScratch()) {
|
|
|
|
// Non-entry functions have no special inputs for now, other registers
|
|
|
|
// required for scratch access.
|
|
|
|
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
|
|
|
|
|
|
|
ArgInfo.PrivateSegmentBuffer =
|
|
|
|
ArgDescriptor::createRegister(ScratchRSrcReg);
|
|
|
|
}
|
2017-08-04 07:00:29 +08:00
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
|
2017-07-28 23:52:08 +08:00
|
|
|
ImplicitArgPtr = true;
|
|
|
|
} else {
|
2018-05-30 03:35:00 +08:00
|
|
|
if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
|
2017-07-28 23:52:08 +08:00
|
|
|
KernargSegmentPtr = true;
|
2018-07-20 17:05:08 +08:00
|
|
|
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
|
|
|
|
MaxKernArgAlign);
|
2018-05-30 03:35:00 +08:00
|
|
|
}
|
2017-05-18 05:56:25 +08:00
|
|
|
}
|
2016-01-13 19:45:36 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
if (UseFixedABI) {
|
2017-07-18 06:35:50 +08:00
|
|
|
WorkGroupIDX = true;
|
2019-02-22 07:27:46 +08:00
|
|
|
WorkGroupIDY = true;
|
|
|
|
WorkGroupIDZ = true;
|
|
|
|
WorkItemIDX = true;
|
|
|
|
WorkItemIDY = true;
|
|
|
|
WorkItemIDZ = true;
|
2020-03-12 04:13:52 +08:00
|
|
|
ImplicitArgPtr = true;
|
|
|
|
} else {
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-group-id-x"))
|
|
|
|
WorkGroupIDX = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-group-id-y"))
|
|
|
|
WorkGroupIDY = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-group-id-z"))
|
|
|
|
WorkGroupIDZ = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-item-id-x"))
|
|
|
|
WorkItemIDX = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-item-id-y"))
|
|
|
|
WorkItemIDY = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-item-id-z"))
|
|
|
|
WorkItemIDZ = true;
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2020-05-20 02:32:31 +08:00
|
|
|
bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
|
2017-07-18 06:35:50 +08:00
|
|
|
if (isEntryFunction()) {
|
|
|
|
// X, XY, and XYZ are the only supported combinations, so make sure Y is
|
|
|
|
// enabled if Z is.
|
|
|
|
if (WorkItemIDZ)
|
|
|
|
WorkItemIDY = true;
|
|
|
|
|
2018-11-01 02:54:06 +08:00
|
|
|
PrivateSegmentWaveByteOffset = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
|
|
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
|
|
|
|
(CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
|
2018-11-01 02:54:06 +08:00
|
|
|
ArgInfo.PrivateSegmentWaveByteOffset =
|
|
|
|
ArgDescriptor::createRegister(AMDGPU::SGPR5);
|
2017-05-05 06:25:20 +08:00
|
|
|
}
|
|
|
|
|
2018-10-05 05:02:16 +08:00
|
|
|
bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
|
|
|
|
if (isAmdHsaOrMesa) {
|
2020-11-07 05:00:10 +08:00
|
|
|
if (!ST.enableFlatScratch())
|
|
|
|
PrivateSegmentBuffer = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
if (UseFixedABI) {
|
2015-12-01 05:16:03 +08:00
|
|
|
DispatchPtr = true;
|
2016-04-26 03:27:18 +08:00
|
|
|
QueuePtr = true;
|
2016-07-23 01:01:30 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
// FIXME: We don't need this?
|
2016-07-23 01:01:30 +08:00
|
|
|
DispatchID = true;
|
2020-03-12 04:13:52 +08:00
|
|
|
} else {
|
|
|
|
if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
|
|
|
|
DispatchPtr = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-queue-ptr"))
|
|
|
|
QueuePtr = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-dispatch-id"))
|
|
|
|
DispatchID = true;
|
|
|
|
}
|
2018-05-30 01:42:50 +08:00
|
|
|
} else if (ST.isMesaGfxShader(F)) {
|
2018-11-01 02:54:06 +08:00
|
|
|
ImplicitBufferPtr = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
|
2017-07-14 08:11:13 +08:00
|
|
|
KernargSegmentPtr = true;
|
|
|
|
|
2020-10-22 05:27:03 +08:00
|
|
|
if (ST.hasFlatAddressSpace() && isEntryFunction() &&
|
|
|
|
(isAmdHsaOrMesa || ST.enableFlatScratch())) {
|
2017-07-19 00:44:58 +08:00
|
|
|
// TODO: This could be refined a lot. The attribute is a poor way of
|
2020-05-20 02:32:31 +08:00
|
|
|
// detecting calls or stack objects that may require it before argument
|
|
|
|
// lowering.
|
2020-10-22 05:27:03 +08:00
|
|
|
if (HasCalls || HasStackObjects || ST.enableFlatScratch())
|
2017-07-19 00:44:58 +08:00
|
|
|
FlatScratchInit = true;
|
|
|
|
}
|
2017-09-29 17:49:35 +08:00
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
|
2017-09-29 17:49:35 +08:00
|
|
|
StringRef S = A.getValueAsString();
|
|
|
|
if (!S.empty())
|
|
|
|
S.consumeInteger(0, GITPtrHigh);
|
2018-02-10 00:57:57 +08:00
|
|
|
|
|
|
|
A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
|
|
|
|
S = A.getValueAsString();
|
|
|
|
if (!S.empty())
|
|
|
|
S.consumeInteger(0, HighBitsOf32BitAddress);
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
|
|
|
|
S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
|
|
|
|
if (!S.empty())
|
|
|
|
S.consumeInteger(0, GDSSize);
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2018-05-31 13:36:04 +08:00
|
|
|
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
|
|
|
|
limitOccupancy(getMaxWavesPerEU());
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
|
2018-05-31 13:36:04 +08:00
|
|
|
limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
|
|
|
|
MF.getFunction()));
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
|
2015-12-01 05:16:03 +08:00
|
|
|
const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.PrivateSegmentBuffer =
|
|
|
|
ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
2019-10-10 15:11:33 +08:00
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 4;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.PrivateSegmentBuffer.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.DispatchPtr.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.QueuePtr.getRegister();
|
2015-11-26 04:55:12 +08:00
|
|
|
}
|
2014-08-22 04:40:54 +08:00
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.KernargSegmentPtr
|
|
|
|
= ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.KernargSegmentPtr.getRegister();
|
2015-12-01 05:15:53 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2016-07-23 01:01:30 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.DispatchID.getRegister();
|
2016-07-23 01:01:30 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2016-02-12 14:31:30 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.FlatScratchInit.getRegister();
|
2016-02-12 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2017-01-25 09:25:13 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.ImplicitBufferPtr.getRegister();
|
2017-01-25 09:25:13 +08:00
|
|
|
}
|
|
|
|
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
|
|
|
|
MCPhysReg Reg) {
|
2017-08-02 09:52:45 +08:00
|
|
|
for (unsigned I = 0; CSRegs[I]; ++I) {
|
|
|
|
if (CSRegs[I] == Reg)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
/// \p returns true if \p NumLanes slots are available in VGPRs already used for
|
|
|
|
/// SGPR spilling.
|
|
|
|
//
|
|
|
|
// FIXME: This only works after processFunctionBeforeFrameFinalized
|
|
|
|
bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
|
|
|
|
unsigned NumNeed) const {
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
unsigned WaveSize = ST.getWavefrontSize();
|
|
|
|
return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
|
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
|
|
|
|
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|
|
|
int FI) {
|
|
|
|
std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
|
2016-01-04 23:50:01 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
// This has already been allocated.
|
|
|
|
if (!SpillLanes.empty())
|
|
|
|
return true;
|
2016-01-04 23:50:01 +08:00
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-02-22 03:12:08 +08:00
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
unsigned WaveSize = ST.getWavefrontSize();
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
2017-02-22 03:12:08 +08:00
|
|
|
|
|
|
|
unsigned Size = FrameInfo.getObjectSize(FI);
|
[AMDGPU] Remove assertion on S1024 SGPR to VGPR spill
Summary:
Replace an assertion that blocks S1024 SGPR to VGPR spill.
The assertion pre-dates S1024 and is not wave size dependent.
Reviewers: arsenm, sameerds, rampitec
Reviewed By: arsenm
Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80783
2020-05-30 10:15:39 +08:00
|
|
|
unsigned NumLanes = Size / 4;
|
|
|
|
|
|
|
|
if (NumLanes > WaveSize)
|
|
|
|
return false;
|
2017-02-22 03:12:08 +08:00
|
|
|
|
[AMDGPU] Remove assertion on S1024 SGPR to VGPR spill
Summary:
Replace an assertion that blocks S1024 SGPR to VGPR spill.
The assertion pre-dates S1024 and is not wave size dependent.
Reviewers: arsenm, sameerds, rampitec
Reviewed By: arsenm
Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80783
2020-05-30 10:15:39 +08:00
|
|
|
assert(Size >= 4 && "invalid sgpr spill size");
|
|
|
|
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
|
2017-02-22 03:12:08 +08:00
|
|
|
|
2019-06-26 21:39:29 +08:00
|
|
|
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
|
2017-08-02 09:52:45 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
// Make sure to handle the case where a wide SGPR spill may span between two
|
|
|
|
// VGPRs.
|
[AMDGPU] Remove assertion on S1024 SGPR to VGPR spill
Summary:
Replace an assertion that blocks S1024 SGPR to VGPR spill.
The assertion pre-dates S1024 and is not wave size dependent.
Reviewers: arsenm, sameerds, rampitec
Reviewed By: arsenm
Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80783
2020-05-30 10:15:39 +08:00
|
|
|
for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
|
2020-03-12 07:22:30 +08:00
|
|
|
Register LaneVGPR;
|
2017-02-22 03:12:08 +08:00
|
|
|
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
|
|
|
|
|
2020-07-01 15:28:47 +08:00
|
|
|
// Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
|
|
|
|
// when one of the two conditions is true:
|
|
|
|
// 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
|
|
|
|
// reserved.
|
|
|
|
// 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
|
|
|
|
// required.
|
|
|
|
if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
|
|
|
|
assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
|
|
|
|
LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
|
|
|
|
} else if (VGPRIndex == 0) {
|
2017-02-22 03:12:08 +08:00
|
|
|
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
|
|
|
|
if (LaneVGPR == AMDGPU::NoRegister) {
|
2017-09-11 16:31:32 +08:00
|
|
|
// We have no VGPRs left for spilling SGPRs. Reset because we will not
|
2017-02-22 03:12:08 +08:00
|
|
|
// partially spill the SGPR to VGPRs.
|
|
|
|
SGPRToVGPRSpills.erase(FI);
|
|
|
|
NumVGPRSpillLanes -= I;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-08-02 09:52:45 +08:00
|
|
|
Optional<int> CSRSpillFI;
|
2018-03-28 03:42:55 +08:00
|
|
|
if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
|
|
|
|
isCalleeSavedReg(CSRegs, LaneVGPR)) {
|
[Alignment][NFC] Use more Align versions of various functions
Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790
Reviewers: courbet
Subscribers: MatzeB, qcolombet, arsenm, sdardis, jvesely, nhaehnle, hiraditya, jrtc27, atanasyan, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77291
2020-04-02 16:53:29 +08:00
|
|
|
CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
|
2017-08-02 09:52:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
|
2017-02-22 03:12:08 +08:00
|
|
|
|
|
|
|
// Add this register as live-in to all blocks to avoid machine verifer
|
|
|
|
// complaining about use of an undefined physical register.
|
|
|
|
for (MachineBasicBlock &BB : MF)
|
|
|
|
BB.addLiveIn(LaneVGPR);
|
|
|
|
} else {
|
2017-08-02 09:52:45 +08:00
|
|
|
LaneVGPR = SpillVGPRs.back().VGPR;
|
2014-08-22 04:40:54 +08:00
|
|
|
}
|
2017-02-22 03:12:08 +08:00
|
|
|
|
|
|
|
SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
|
2014-08-22 04:40:54 +08:00
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
/// Reserve a VGPR for spilling of SGPRs
|
|
|
|
bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
Register LaneVGPR = TRI->findUnusedRegister(
|
|
|
|
MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
|
2020-10-16 06:53:56 +08:00
|
|
|
if (LaneVGPR == Register())
|
|
|
|
return false;
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
|
|
|
|
FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-12 05:54:13 +08:00
|
|
|
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
|
|
|
|
/// Either AGPR is spilled to VGPR to vice versa.
|
|
|
|
/// Returns true if a \p FI can be eliminated completely.
|
|
|
|
bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
|
|
|
|
int FI,
|
|
|
|
bool isAGPRtoVGPR) {
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
|
|
|
|
assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
|
|
|
|
|
|
|
|
auto &Spill = VGPRToAGPRSpills[FI];
|
|
|
|
|
|
|
|
// This has already been allocated.
|
|
|
|
if (!Spill.Lanes.empty())
|
|
|
|
return Spill.FullyAllocated;
|
|
|
|
|
|
|
|
unsigned Size = FrameInfo.getObjectSize(FI);
|
|
|
|
unsigned NumLanes = Size / 4;
|
|
|
|
Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
|
|
|
|
|
|
|
|
const TargetRegisterClass &RC =
|
|
|
|
isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
|
|
|
|
auto Regs = RC.getRegisters();
|
|
|
|
|
|
|
|
auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
Spill.FullyAllocated = true;
|
|
|
|
|
|
|
|
// FIXME: Move allocation logic out of MachineFunctionInfo and initialize
|
|
|
|
// once.
|
|
|
|
BitVector OtherUsedRegs;
|
|
|
|
OtherUsedRegs.resize(TRI->getNumRegs());
|
|
|
|
|
|
|
|
const uint32_t *CSRMask =
|
|
|
|
TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
|
|
|
|
if (CSRMask)
|
|
|
|
OtherUsedRegs.setBitsInMask(CSRMask);
|
|
|
|
|
|
|
|
// TODO: Should include register tuples, but doesn't matter with current
|
|
|
|
// usage.
|
|
|
|
for (MCPhysReg Reg : SpillAGPR)
|
|
|
|
OtherUsedRegs.set(Reg);
|
|
|
|
for (MCPhysReg Reg : SpillVGPR)
|
|
|
|
OtherUsedRegs.set(Reg);
|
|
|
|
|
|
|
|
SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
|
|
|
|
for (unsigned I = 0; I < NumLanes; ++I) {
|
|
|
|
NextSpillReg = std::find_if(
|
|
|
|
NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
|
|
|
|
return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
|
|
|
|
!OtherUsedRegs[Reg];
|
|
|
|
});
|
|
|
|
|
|
|
|
if (NextSpillReg == Regs.end()) { // Registers exhausted
|
|
|
|
Spill.FullyAllocated = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
OtherUsedRegs.set(*NextSpillReg);
|
|
|
|
SpillRegs.push_back(*NextSpillReg);
|
|
|
|
Spill.Lanes[I] = *NextSpillReg++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Spill.FullyAllocated;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
|
2020-04-21 17:34:33 +08:00
|
|
|
// The FP & BP spills haven't been inserted yet, so keep them around.
|
2019-07-09 03:03:38 +08:00
|
|
|
for (auto &R : SGPRToVGPRSpills) {
|
2020-04-21 17:34:33 +08:00
|
|
|
if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
|
2019-07-09 03:03:38 +08:00
|
|
|
MFI.RemoveStackObject(R.first);
|
|
|
|
}
|
|
|
|
|
|
|
|
// All other SPGRs must be allocated on the default stack, so reset the stack
|
|
|
|
// ID.
|
|
|
|
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
|
|
|
|
++i)
|
2020-04-21 17:34:33 +08:00
|
|
|
if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
|
2019-07-09 03:03:38 +08:00
|
|
|
MFI.setStackID(i, TargetStackID::Default);
|
2019-07-12 05:54:13 +08:00
|
|
|
|
|
|
|
for (auto &R : VGPRToAGPRSpills) {
|
|
|
|
if (R.second.FullyAllocated)
|
|
|
|
MFI.RemoveStackObject(R.first);
|
|
|
|
}
|
2013-11-28 05:23:35 +08:00
|
|
|
}
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
|
|
|
|
MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
|
|
|
|
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
|
|
|
|
return AMDGPU::SGPR0 + NumUserSGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
|
|
|
|
return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
|
|
|
|
}
|
2019-03-15 06:54:43 +08:00
|
|
|
|
2020-05-01 02:25:24 +08:00
|
|
|
Register
|
|
|
|
SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
if (!ST.isAmdPalOS())
|
|
|
|
return Register();
|
|
|
|
Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
|
|
|
|
if (ST.hasMergedShaders()) {
|
|
|
|
switch (MF.getFunction().getCallingConv()) {
|
|
|
|
case CallingConv::AMDGPU_HS:
|
|
|
|
case CallingConv::AMDGPU_GS:
|
|
|
|
// Low GIT address is passed in s8 rather than s0 for an LS+HS or
|
|
|
|
// ES+GS merged shader on gfx9+.
|
|
|
|
GitPtrLo = AMDGPU::SGPR8;
|
|
|
|
return GitPtrLo;
|
|
|
|
default:
|
|
|
|
return GitPtrLo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return GitPtrLo;
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
static yaml::StringValue regToString(Register Reg,
|
2019-03-15 06:54:43 +08:00
|
|
|
const TargetRegisterInfo &TRI) {
|
|
|
|
yaml::StringValue Dest;
|
2019-03-19 03:00:46 +08:00
|
|
|
{
|
|
|
|
raw_string_ostream OS(Dest.Value);
|
|
|
|
OS << printReg(Reg, &TRI);
|
|
|
|
}
|
2019-03-15 06:54:43 +08:00
|
|
|
return Dest;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
static Optional<yaml::SIArgumentInfo>
|
|
|
|
convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
|
|
|
|
const TargetRegisterInfo &TRI) {
|
|
|
|
yaml::SIArgumentInfo AI;
|
|
|
|
|
|
|
|
auto convertArg = [&](Optional<yaml::SIArgument> &A,
|
|
|
|
const ArgDescriptor &Arg) {
|
|
|
|
if (!Arg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Create a register or stack argument.
|
|
|
|
yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
|
|
|
|
if (Arg.isRegister()) {
|
|
|
|
raw_string_ostream OS(SA.RegisterName.Value);
|
|
|
|
OS << printReg(Arg.getRegister(), &TRI);
|
|
|
|
} else
|
|
|
|
SA.StackOffset = Arg.getStackOffset();
|
|
|
|
// Check and update the optional mask.
|
|
|
|
if (Arg.isMasked())
|
|
|
|
SA.Mask = Arg.getMask();
|
|
|
|
|
|
|
|
A = SA;
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool Any = false;
|
|
|
|
Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
|
|
|
|
Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
|
|
|
|
Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
|
|
|
|
Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
|
|
|
|
Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
|
|
|
|
Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
|
|
|
|
Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
|
|
|
|
Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
|
|
|
|
Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
|
|
|
|
Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
|
|
|
|
Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
|
|
|
|
Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
|
|
|
|
ArgInfo.PrivateSegmentWaveByteOffset);
|
|
|
|
Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
|
|
|
|
Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
|
|
|
|
Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
|
|
|
|
Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
|
|
|
|
Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
|
|
|
|
|
|
|
|
if (Any)
|
|
|
|
return AI;
|
|
|
|
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
2019-03-15 06:54:43 +08:00
|
|
|
yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
|
[amdgpu] Add codegen support for HIP dynamic shared memory.
Summary:
- HIP uses an unsized extern array `extern __shared__ T s[]` to declare
the dynamic shared memory, which size is not known at the
compile time.
Reviewers: arsenm, yaxunl, kpyzhov, b-sumner
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D82496
2020-06-25 00:13:10 +08:00
|
|
|
const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI)
|
|
|
|
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
|
|
|
|
MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
|
|
|
|
DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
|
|
|
|
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
|
|
|
|
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
|
|
|
|
HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
|
|
|
|
HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
|
|
|
|
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
|
|
|
|
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
|
|
|
|
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
|
|
|
|
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
|
|
|
|
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
|
|
|
|
}
|
2019-03-15 06:54:43 +08:00
|
|
|
|
|
|
|
void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
|
|
|
|
MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SIMachineFunctionInfo::initializeBaseYamlFields(
|
|
|
|
const yaml::SIMachineFunctionInfo &YamlMFI) {
|
|
|
|
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
|
2019-10-15 20:56:24 +08:00
|
|
|
MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
|
2019-03-15 06:54:43 +08:00
|
|
|
LDSSize = YamlMFI.LDSSize;
|
[amdgpu] Add codegen support for HIP dynamic shared memory.
Summary:
- HIP uses an unsized extern array `extern __shared__ T s[]` to declare
the dynamic shared memory, which size is not known at the
compile time.
Reviewers: arsenm, yaxunl, kpyzhov, b-sumner
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D82496
2020-06-25 00:13:10 +08:00
|
|
|
DynLDSAlign = YamlMFI.DynLDSAlign;
|
2019-08-28 02:18:38 +08:00
|
|
|
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
|
2019-03-15 06:54:43 +08:00
|
|
|
IsEntryFunction = YamlMFI.IsEntryFunction;
|
|
|
|
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
|
|
|
|
MemoryBound = YamlMFI.MemoryBound;
|
|
|
|
WaveLimiter = YamlMFI.WaveLimiter;
|
2020-07-24 09:11:46 +08:00
|
|
|
HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
|
|
|
|
HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
|
2019-03-15 06:54:43 +08:00
|
|
|
return false;
|
|
|
|
}
|
[AMDGPU] Reserving VGPR for future SGPR Spill
Summary: One VGPR register is allocated to handle a future spill of SGPR if "--amdgpu-reserve-vgpr-for-sgpr-spill" option is used
Reviewers: arsenm, rampitec, msearles, cdevadas
Reviewed By: arsenm
Subscribers: madhur13490, qcolombet, kerbowa, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #amdgpu, #llvm
Differential Revision: https://reviews.llvm.org/D70379
2020-04-10 15:55:11 +08:00
|
|
|
|
|
|
|
// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
|
|
|
|
bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
|
|
|
|
MachineFunction &MF) {
|
|
|
|
for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
|
|
|
|
if (i->VGPR == ReservedVGPR) {
|
|
|
|
SpillVGPRs.erase(i);
|
|
|
|
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
|
|
MBB.removeLiveIn(ReservedVGPR);
|
|
|
|
MBB.sortUniqueLiveIns();
|
|
|
|
}
|
|
|
|
this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|