2017-08-08 08:47:13 +08:00
|
|
|
//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SIMachineFunctionInfo.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
2020-03-12 04:13:52 +08:00
|
|
|
#include "AMDGPUTargetMachine.h"
|
2014-09-24 09:33:17 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "SIRegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "Utils/AMDGPUBaseInfo.h"
|
|
|
|
#include "llvm/ADT/Optional.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2014-08-22 04:40:54 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2013-11-28 05:23:35 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include "llvm/IR/CallingConv.h"
|
2014-05-02 23:41:42 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
2017-08-08 08:47:13 +08:00
|
|
|
#include <cassert>
|
|
|
|
#include <vector>
|
2013-11-28 05:23:35 +08:00
|
|
|
|
|
|
|
#define MAX_LANES 64
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
2013-04-02 05:47:53 +08:00
|
|
|
: AMDGPUMachineFunction(MF),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentBuffer(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
DispatchPtr(false),
|
|
|
|
QueuePtr(false),
|
2015-12-01 05:16:03 +08:00
|
|
|
KernargSegmentPtr(false),
|
2016-07-23 01:01:30 +08:00
|
|
|
DispatchID(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
FlatScratchInit(false),
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkGroupIDX(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkGroupIDY(false),
|
|
|
|
WorkGroupIDZ(false),
|
|
|
|
WorkGroupInfo(false),
|
2015-12-01 05:16:03 +08:00
|
|
|
PrivateSegmentWaveByteOffset(false),
|
2016-04-15 00:27:03 +08:00
|
|
|
WorkItemIDX(false),
|
2015-11-26 04:55:12 +08:00
|
|
|
WorkItemIDY(false),
|
2017-01-25 09:25:13 +08:00
|
|
|
WorkItemIDZ(false),
|
2017-08-04 07:12:44 +08:00
|
|
|
ImplicitBufferPtr(false),
|
2017-09-29 17:49:35 +08:00
|
|
|
ImplicitArgPtr(false),
|
2018-02-10 00:57:57 +08:00
|
|
|
GITPtrHigh(0xffffffff),
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
HighBitsOf32BitAddress(0),
|
|
|
|
GDSSize(0) {
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-12-16 06:22:58 +08:00
|
|
|
const Function &F = MF.getFunction();
|
|
|
|
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
|
|
|
|
WavesPerEU = ST.getWavesPerEU(F);
|
2015-11-26 04:55:12 +08:00
|
|
|
|
2019-07-31 09:07:10 +08:00
|
|
|
Occupancy = ST.computeOccupancy(MF, getLDSSize());
|
2018-07-20 17:05:08 +08:00
|
|
|
CallingConv::ID CC = F.getCallingConv();
|
2020-03-12 05:12:20 +08:00
|
|
|
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
|
|
|
|
// FIXME: Should have analysis or something rather than attribute to detect
|
|
|
|
// calls.
|
|
|
|
const bool HasCalls = FrameInfo.hasCalls() || F.hasFnAttribute("amdgpu-calls");
|
2018-07-20 17:05:08 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
// Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
|
|
|
|
// have any calls.
|
|
|
|
const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
|
|
|
|
(!isEntryFunction() || HasCalls);
|
|
|
|
|
2018-07-20 17:05:08 +08:00
|
|
|
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
|
|
|
|
if (!F.arg_empty())
|
|
|
|
KernargSegmentPtr = true;
|
|
|
|
WorkGroupIDX = true;
|
|
|
|
WorkItemIDX = true;
|
|
|
|
} else if (CC == CallingConv::AMDGPU_PS) {
|
|
|
|
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
|
|
|
|
}
|
2018-05-31 13:36:04 +08:00
|
|
|
|
2017-05-18 05:56:25 +08:00
|
|
|
if (!isEntryFunction()) {
|
|
|
|
// Non-entry functions have no special inputs for now, other registers
|
|
|
|
// required for scratch access.
|
|
|
|
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
2019-07-09 03:03:38 +08:00
|
|
|
|
2019-07-09 03:05:19 +08:00
|
|
|
// TODO: Pick a high register, and shift down, similar to a kernel.
|
2020-03-04 09:39:47 +08:00
|
|
|
FrameOffsetReg = AMDGPU::SGPR33;
|
2017-06-27 01:53:59 +08:00
|
|
|
StackPtrOffsetReg = AMDGPU::SGPR32;
|
2017-07-19 00:44:56 +08:00
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.PrivateSegmentBuffer =
|
|
|
|
ArgDescriptor::createRegister(ScratchRSrcReg);
|
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
|
2017-07-28 23:52:08 +08:00
|
|
|
ImplicitArgPtr = true;
|
|
|
|
} else {
|
2018-05-30 03:35:00 +08:00
|
|
|
if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
|
2017-07-28 23:52:08 +08:00
|
|
|
KernargSegmentPtr = true;
|
2018-07-20 17:05:08 +08:00
|
|
|
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
|
|
|
|
MaxKernArgAlign);
|
2018-05-30 03:35:00 +08:00
|
|
|
}
|
2017-05-18 05:56:25 +08:00
|
|
|
}
|
2016-01-13 19:45:36 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
if (UseFixedABI) {
|
2017-07-18 06:35:50 +08:00
|
|
|
WorkGroupIDX = true;
|
2019-02-22 07:27:46 +08:00
|
|
|
WorkGroupIDY = true;
|
|
|
|
WorkGroupIDZ = true;
|
|
|
|
WorkItemIDX = true;
|
|
|
|
WorkItemIDY = true;
|
|
|
|
WorkItemIDZ = true;
|
2020-03-12 04:13:52 +08:00
|
|
|
ImplicitArgPtr = true;
|
|
|
|
} else {
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-group-id-x"))
|
|
|
|
WorkGroupIDX = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-group-id-y"))
|
|
|
|
WorkGroupIDY = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-group-id-z"))
|
|
|
|
WorkGroupIDZ = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-item-id-x"))
|
|
|
|
WorkItemIDX = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-item-id-y"))
|
|
|
|
WorkItemIDY = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-work-item-id-z"))
|
|
|
|
WorkItemIDZ = true;
|
|
|
|
}
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-07-19 00:44:56 +08:00
|
|
|
bool HasStackObjects = FrameInfo.hasStackObjects();
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-07-18 06:35:50 +08:00
|
|
|
if (isEntryFunction()) {
|
|
|
|
// X, XY, and XYZ are the only supported combinations, so make sure Y is
|
|
|
|
// enabled if Z is.
|
|
|
|
if (WorkItemIDZ)
|
|
|
|
WorkItemIDY = true;
|
|
|
|
|
2018-11-01 02:54:06 +08:00
|
|
|
PrivateSegmentWaveByteOffset = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
|
|
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
|
|
|
|
(CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
|
2018-11-01 02:54:06 +08:00
|
|
|
ArgInfo.PrivateSegmentWaveByteOffset =
|
|
|
|
ArgDescriptor::createRegister(AMDGPU::SGPR5);
|
2017-05-05 06:25:20 +08:00
|
|
|
}
|
|
|
|
|
2018-10-05 05:02:16 +08:00
|
|
|
bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
|
|
|
|
if (isAmdHsaOrMesa) {
|
2018-11-01 02:54:06 +08:00
|
|
|
PrivateSegmentBuffer = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
if (UseFixedABI) {
|
2015-12-01 05:16:03 +08:00
|
|
|
DispatchPtr = true;
|
2016-04-26 03:27:18 +08:00
|
|
|
QueuePtr = true;
|
2016-07-23 01:01:30 +08:00
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
// FIXME: We don't need this?
|
2016-07-23 01:01:30 +08:00
|
|
|
DispatchID = true;
|
2020-03-12 04:13:52 +08:00
|
|
|
} else {
|
|
|
|
if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
|
|
|
|
DispatchPtr = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-queue-ptr"))
|
|
|
|
QueuePtr = true;
|
|
|
|
|
|
|
|
if (F.hasFnAttribute("amdgpu-dispatch-id"))
|
|
|
|
DispatchID = true;
|
|
|
|
}
|
2018-05-30 01:42:50 +08:00
|
|
|
} else if (ST.isMesaGfxShader(F)) {
|
2018-11-01 02:54:06 +08:00
|
|
|
ImplicitBufferPtr = true;
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 04:13:52 +08:00
|
|
|
if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
|
2017-07-14 08:11:13 +08:00
|
|
|
KernargSegmentPtr = true;
|
|
|
|
|
2018-10-05 05:02:16 +08:00
|
|
|
if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
|
[AMDGPU] Correct the setting of `FlatScratchInit`.
Summary: - That flag setting should skip spilling stack slot.
Reviewers: arsenm, rampitec
Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64143
llvm-svn: 365137
2019-07-04 21:29:45 +08:00
|
|
|
auto hasNonSpillStackObjects = [&]() {
|
|
|
|
// Avoid expensive checking if there's no stack objects.
|
|
|
|
if (!HasStackObjects)
|
|
|
|
return false;
|
|
|
|
for (auto OI = FrameInfo.getObjectIndexBegin(),
|
|
|
|
OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
|
|
|
|
if (!FrameInfo.isSpillSlotObjectIndex(OI))
|
|
|
|
return true;
|
|
|
|
// All stack objects are spill slots.
|
|
|
|
return false;
|
|
|
|
};
|
2017-07-19 00:44:58 +08:00
|
|
|
// TODO: This could be refined a lot. The attribute is a poor way of
|
|
|
|
// detecting calls that may require it before argument lowering.
|
2020-03-12 05:12:20 +08:00
|
|
|
if (HasCalls || hasNonSpillStackObjects())
|
2017-07-19 00:44:58 +08:00
|
|
|
FlatScratchInit = true;
|
|
|
|
}
|
2017-09-29 17:49:35 +08:00
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
|
2017-09-29 17:49:35 +08:00
|
|
|
StringRef S = A.getValueAsString();
|
|
|
|
if (!S.empty())
|
|
|
|
S.consumeInteger(0, GITPtrHigh);
|
2018-02-10 00:57:57 +08:00
|
|
|
|
|
|
|
A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
|
|
|
|
S = A.getValueAsString();
|
|
|
|
if (!S.empty())
|
|
|
|
S.consumeInteger(0, HighBitsOf32BitAddress);
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
|
|
|
|
S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
|
|
|
|
if (!S.empty())
|
|
|
|
S.consumeInteger(0, GDSSize);
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2018-05-31 13:36:04 +08:00
|
|
|
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
|
|
|
|
limitOccupancy(getMaxWavesPerEU());
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
|
2018-05-31 13:36:04 +08:00
|
|
|
limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
|
|
|
|
MF.getFunction()));
|
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
|
2015-12-01 05:16:03 +08:00
|
|
|
const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.PrivateSegmentBuffer =
|
|
|
|
ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
2019-10-10 15:11:33 +08:00
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 4;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.PrivateSegmentBuffer.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.DispatchPtr.getRegister();
|
2015-12-01 05:16:03 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.QueuePtr.getRegister();
|
2015-11-26 04:55:12 +08:00
|
|
|
}
|
2014-08-22 04:40:54 +08:00
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.KernargSegmentPtr
|
|
|
|
= ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2015-12-01 05:16:03 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.KernargSegmentPtr.getRegister();
|
2015-12-01 05:15:53 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2016-07-23 01:01:30 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.DispatchID.getRegister();
|
2016-07-23 01:01:30 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2016-02-12 14:31:30 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.FlatScratchInit.getRegister();
|
2016-02-12 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
|
2017-08-04 07:00:29 +08:00
|
|
|
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
|
|
|
|
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
|
2017-01-25 09:25:13 +08:00
|
|
|
NumUserSGPRs += 2;
|
2017-08-04 07:00:29 +08:00
|
|
|
return ArgInfo.ImplicitBufferPtr.getRegister();
|
2017-01-25 09:25:13 +08:00
|
|
|
}
|
|
|
|
|
2017-08-02 09:52:45 +08:00
|
|
|
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
|
|
|
|
for (unsigned I = 0; CSRegs[I]; ++I) {
|
|
|
|
if (CSRegs[I] == Reg)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-07-09 03:03:38 +08:00
|
|
|
/// \p returns true if \p NumLanes slots are available in VGPRs already used for
|
|
|
|
/// SGPR spilling.
|
|
|
|
//
|
|
|
|
// FIXME: This only works after processFunctionBeforeFrameFinalized
|
|
|
|
bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
|
|
|
|
unsigned NumNeed) const {
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
unsigned WaveSize = ST.getWavefrontSize();
|
|
|
|
return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
|
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
|
|
|
|
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
|
|
|
|
int FI) {
|
|
|
|
std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
|
2016-01-04 23:50:01 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
// This has already been allocated.
|
|
|
|
if (!SpillLanes.empty())
|
|
|
|
return true;
|
2016-01-04 23:50:01 +08:00
|
|
|
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
2017-02-22 03:12:08 +08:00
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
unsigned WaveSize = ST.getWavefrontSize();
|
|
|
|
|
|
|
|
unsigned Size = FrameInfo.getObjectSize(FI);
|
|
|
|
assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
|
|
|
|
assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
|
|
|
|
|
|
|
|
int NumLanes = Size / 4;
|
|
|
|
|
2019-06-26 21:39:29 +08:00
|
|
|
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
|
2017-08-02 09:52:45 +08:00
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
// Make sure to handle the case where a wide SGPR spill may span between two
|
|
|
|
// VGPRs.
|
|
|
|
for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
|
2020-03-12 07:22:30 +08:00
|
|
|
Register LaneVGPR;
|
2017-02-22 03:12:08 +08:00
|
|
|
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
|
|
|
|
|
|
|
|
if (VGPRIndex == 0) {
|
|
|
|
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
|
|
|
|
if (LaneVGPR == AMDGPU::NoRegister) {
|
2017-09-11 16:31:32 +08:00
|
|
|
// We have no VGPRs left for spilling SGPRs. Reset because we will not
|
2017-02-22 03:12:08 +08:00
|
|
|
// partially spill the SGPR to VGPRs.
|
|
|
|
SGPRToVGPRSpills.erase(FI);
|
|
|
|
NumVGPRSpillLanes -= I;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-08-02 09:52:45 +08:00
|
|
|
Optional<int> CSRSpillFI;
|
2018-03-28 03:42:55 +08:00
|
|
|
if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
|
|
|
|
isCalleeSavedReg(CSRegs, LaneVGPR)) {
|
|
|
|
CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
|
2017-08-02 09:52:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
|
2017-02-22 03:12:08 +08:00
|
|
|
|
|
|
|
// Add this register as live-in to all blocks to avoid machine verifer
|
|
|
|
// complaining about use of an undefined physical register.
|
|
|
|
for (MachineBasicBlock &BB : MF)
|
|
|
|
BB.addLiveIn(LaneVGPR);
|
|
|
|
} else {
|
2017-08-02 09:52:45 +08:00
|
|
|
LaneVGPR = SpillVGPRs.back().VGPR;
|
2014-08-22 04:40:54 +08:00
|
|
|
}
|
2017-02-22 03:12:08 +08:00
|
|
|
|
|
|
|
SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
|
2014-08-22 04:40:54 +08:00
|
|
|
}
|
|
|
|
|
2017-02-22 03:12:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-12 05:54:13 +08:00
|
|
|
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
|
|
|
|
/// Either AGPR is spilled to VGPR to vice versa.
|
|
|
|
/// Returns true if a \p FI can be eliminated completely.
|
|
|
|
bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
|
|
|
|
int FI,
|
|
|
|
bool isAGPRtoVGPR) {
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
|
|
|
|
assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
|
|
|
|
|
|
|
|
auto &Spill = VGPRToAGPRSpills[FI];
|
|
|
|
|
|
|
|
// This has already been allocated.
|
|
|
|
if (!Spill.Lanes.empty())
|
|
|
|
return Spill.FullyAllocated;
|
|
|
|
|
|
|
|
unsigned Size = FrameInfo.getObjectSize(FI);
|
|
|
|
unsigned NumLanes = Size / 4;
|
|
|
|
Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
|
|
|
|
|
|
|
|
const TargetRegisterClass &RC =
|
|
|
|
isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
|
|
|
|
auto Regs = RC.getRegisters();
|
|
|
|
|
|
|
|
auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
|
|
|
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
|
|
|
Spill.FullyAllocated = true;
|
|
|
|
|
|
|
|
// FIXME: Move allocation logic out of MachineFunctionInfo and initialize
|
|
|
|
// once.
|
|
|
|
BitVector OtherUsedRegs;
|
|
|
|
OtherUsedRegs.resize(TRI->getNumRegs());
|
|
|
|
|
|
|
|
const uint32_t *CSRMask =
|
|
|
|
TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
|
|
|
|
if (CSRMask)
|
|
|
|
OtherUsedRegs.setBitsInMask(CSRMask);
|
|
|
|
|
|
|
|
// TODO: Should include register tuples, but doesn't matter with current
|
|
|
|
// usage.
|
|
|
|
for (MCPhysReg Reg : SpillAGPR)
|
|
|
|
OtherUsedRegs.set(Reg);
|
|
|
|
for (MCPhysReg Reg : SpillVGPR)
|
|
|
|
OtherUsedRegs.set(Reg);
|
|
|
|
|
|
|
|
SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
|
|
|
|
for (unsigned I = 0; I < NumLanes; ++I) {
|
|
|
|
NextSpillReg = std::find_if(
|
|
|
|
NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
|
|
|
|
return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
|
|
|
|
!OtherUsedRegs[Reg];
|
|
|
|
});
|
|
|
|
|
|
|
|
if (NextSpillReg == Regs.end()) { // Registers exhausted
|
|
|
|
Spill.FullyAllocated = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
OtherUsedRegs.set(*NextSpillReg);
|
|
|
|
SpillRegs.push_back(*NextSpillReg);
|
|
|
|
Spill.Lanes[I] = *NextSpillReg++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Spill.FullyAllocated;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
|
2019-07-09 03:03:38 +08:00
|
|
|
// The FP spill hasn't been inserted yet, so keep it around.
|
|
|
|
for (auto &R : SGPRToVGPRSpills) {
|
|
|
|
if (R.first != FramePointerSaveIndex)
|
|
|
|
MFI.RemoveStackObject(R.first);
|
|
|
|
}
|
|
|
|
|
|
|
|
// All other SPGRs must be allocated on the default stack, so reset the stack
|
|
|
|
// ID.
|
|
|
|
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
|
|
|
|
++i)
|
|
|
|
if (i != FramePointerSaveIndex)
|
|
|
|
MFI.setStackID(i, TargetStackID::Default);
|
2019-07-12 05:54:13 +08:00
|
|
|
|
|
|
|
for (auto &R : VGPRToAGPRSpills) {
|
|
|
|
if (R.second.FullyAllocated)
|
|
|
|
MFI.RemoveStackObject(R.first);
|
|
|
|
}
|
2013-11-28 05:23:35 +08:00
|
|
|
}
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
|
|
|
|
MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
|
|
|
|
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
|
|
|
|
return AMDGPU::SGPR0 + NumUserSGPRs;
|
|
|
|
}
|
|
|
|
|
|
|
|
MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
|
|
|
|
return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
|
|
|
|
}
|
2019-03-15 06:54:43 +08:00
|
|
|
|
2020-03-12 07:22:30 +08:00
|
|
|
static yaml::StringValue regToString(Register Reg,
|
2019-03-15 06:54:43 +08:00
|
|
|
const TargetRegisterInfo &TRI) {
|
|
|
|
yaml::StringValue Dest;
|
2019-03-19 03:00:46 +08:00
|
|
|
{
|
|
|
|
raw_string_ostream OS(Dest.Value);
|
|
|
|
OS << printReg(Reg, &TRI);
|
|
|
|
}
|
2019-03-15 06:54:43 +08:00
|
|
|
return Dest;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
static Optional<yaml::SIArgumentInfo>
|
|
|
|
convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
|
|
|
|
const TargetRegisterInfo &TRI) {
|
|
|
|
yaml::SIArgumentInfo AI;
|
|
|
|
|
|
|
|
auto convertArg = [&](Optional<yaml::SIArgument> &A,
|
|
|
|
const ArgDescriptor &Arg) {
|
|
|
|
if (!Arg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Create a register or stack argument.
|
|
|
|
yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
|
|
|
|
if (Arg.isRegister()) {
|
|
|
|
raw_string_ostream OS(SA.RegisterName.Value);
|
|
|
|
OS << printReg(Arg.getRegister(), &TRI);
|
|
|
|
} else
|
|
|
|
SA.StackOffset = Arg.getStackOffset();
|
|
|
|
// Check and update the optional mask.
|
|
|
|
if (Arg.isMasked())
|
|
|
|
SA.Mask = Arg.getMask();
|
|
|
|
|
|
|
|
A = SA;
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool Any = false;
|
|
|
|
Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
|
|
|
|
Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
|
|
|
|
Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
|
|
|
|
Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
|
|
|
|
Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
|
|
|
|
Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
|
|
|
|
Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
|
|
|
|
Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
|
|
|
|
Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
|
|
|
|
Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
|
|
|
|
Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
|
|
|
|
Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
|
|
|
|
ArgInfo.PrivateSegmentWaveByteOffset);
|
|
|
|
Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
|
|
|
|
Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
|
|
|
|
Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
|
|
|
|
Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
|
|
|
|
Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
|
|
|
|
|
|
|
|
if (Any)
|
|
|
|
return AI;
|
|
|
|
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
2019-03-15 06:54:43 +08:00
|
|
|
yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
|
|
|
|
const llvm::SIMachineFunctionInfo& MFI,
|
|
|
|
const TargetRegisterInfo &TRI)
|
|
|
|
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
|
|
|
|
MaxKernArgAlign(MFI.getMaxKernArgAlign()),
|
|
|
|
LDSSize(MFI.getLDSSize()),
|
|
|
|
IsEntryFunction(MFI.isEntryFunction()),
|
|
|
|
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
|
|
|
|
MemoryBound(MFI.isMemoryBound()),
|
|
|
|
WaveLimiter(MFI.needsWaveLimiter()),
|
2019-08-28 02:18:38 +08:00
|
|
|
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
|
2019-03-15 06:54:43 +08:00
|
|
|
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
|
|
|
|
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
|
[AMDGPU] Enable serializing of argument info.
Summary:
- Support serialization of all arguments in machine function info. This
enables fabricating MIR tests depending on argument info.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64096
llvm-svn: 364995
2019-07-03 10:00:21 +08:00
|
|
|
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
|
2019-07-11 00:09:26 +08:00
|
|
|
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
|
|
|
|
Mode(MFI.getMode()) {}
|
2019-03-15 06:54:43 +08:00
|
|
|
|
|
|
|
void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
|
|
|
|
MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SIMachineFunctionInfo::initializeBaseYamlFields(
|
|
|
|
const yaml::SIMachineFunctionInfo &YamlMFI) {
|
|
|
|
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
|
2019-10-15 20:56:24 +08:00
|
|
|
MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
|
2019-03-15 06:54:43 +08:00
|
|
|
LDSSize = YamlMFI.LDSSize;
|
2019-08-28 02:18:38 +08:00
|
|
|
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
|
2019-03-15 06:54:43 +08:00
|
|
|
IsEntryFunction = YamlMFI.IsEntryFunction;
|
|
|
|
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
|
|
|
|
MemoryBound = YamlMFI.MemoryBound;
|
|
|
|
WaveLimiter = YamlMFI.WaveLimiter;
|
|
|
|
return false;
|
|
|
|
}
|