forked from OSchip/llvm-project
241 lines
7.3 KiB
C++
241 lines
7.3 KiB
C++
//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// This pass combines split register tuple initialization into a single pseudo:
|
|
///
|
|
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
|
|
/// %0.sub0:sreg_64 = S_MOV_B32 2
|
|
/// =>
|
|
/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
|
|
///
|
|
/// This is to allow rematerialization of a value instead of spilling. It is
|
|
/// supposed to be done after register coalescer to allow it to do its job and
|
|
/// before actual register allocation to allow rematerialization.
|
|
///
|
|
/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
|
|
/// although the same shall be possible with other register classes and
|
|
/// instructions if necessary.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/InitializePasses.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
|
|
|
|
namespace {
|
|
|
|
class GCNPreRAOptimizations : public MachineFunctionPass {
|
|
private:
|
|
const SIInstrInfo *TII;
|
|
const SIRegisterInfo *TRI;
|
|
MachineRegisterInfo *MRI;
|
|
LiveIntervals *LIS;
|
|
|
|
bool processReg(Register Reg);
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
GCNPreRAOptimizations() : MachineFunctionPass(ID) {
|
|
initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
StringRef getPassName() const override {
|
|
return "AMDGPU Pre-RA optimizations";
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<LiveIntervals>();
|
|
AU.setPreservesAll();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
};
|
|
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
|
|
"AMDGPU Pre-RA optimizations", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
|
INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
|
|
false, false)
|
|
|
|
char GCNPreRAOptimizations::ID = 0;
|
|
|
|
char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
|
|
|
|
FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
|
|
return new GCNPreRAOptimizations();
|
|
}
|
|
|
|
bool GCNPreRAOptimizations::processReg(Register Reg) {
|
|
MachineInstr *Def0 = nullptr;
|
|
MachineInstr *Def1 = nullptr;
|
|
uint64_t Init = 0;
|
|
bool Changed = false;
|
|
SmallSet<Register, 32> ModifiedRegs;
|
|
bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
|
|
|
|
for (MachineInstr &I : MRI->def_instructions(Reg)) {
|
|
switch (I.getOpcode()) {
|
|
default:
|
|
return false;
|
|
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
|
|
break;
|
|
case AMDGPU::COPY: {
|
|
// Some subtargets cannot do an AGPR to AGPR copy directly, and need an
|
|
// intermdiate temporary VGPR register. Try to find the defining
|
|
// accvgpr_write to avoid temporary registers.
|
|
|
|
if (!IsAGPRDst)
|
|
return false;
|
|
|
|
Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
if (!SrcReg.isVirtual())
|
|
break;
|
|
|
|
// Check if source of copy is from another AGPR.
|
|
bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
|
|
if (!IsAGPRSrc)
|
|
break;
|
|
|
|
// def_instructions() does not look at subregs so it may give us a
|
|
// different instruction that defines the same vreg but different subreg
|
|
// so we have to manually check subreg.
|
|
Register SrcSubReg = I.getOperand(1).getSubReg();
|
|
for (auto &Def : MRI->def_instructions(SrcReg)) {
|
|
if (SrcSubReg != Def.getOperand(0).getSubReg())
|
|
continue;
|
|
|
|
if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
|
|
MachineOperand DefSrcMO = Def.getOperand(1);
|
|
|
|
// Immediates are not an issue and can be propagated in
|
|
// postrapseudos pass. Only handle cases where defining
|
|
// accvgpr_write source is a vreg.
|
|
if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
|
|
// Propagate source reg of accvgpr write to this copy instruction
|
|
I.getOperand(1).setReg(DefSrcMO.getReg());
|
|
I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
|
|
|
|
// Reg uses were changed, collect unique set of registers to update
|
|
// live intervals at the end.
|
|
ModifiedRegs.insert(DefSrcMO.getReg());
|
|
ModifiedRegs.insert(SrcReg);
|
|
|
|
Changed = true;
|
|
}
|
|
|
|
// Found the defining accvgpr_write, stop looking any further.
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case AMDGPU::S_MOV_B32:
|
|
if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
|
|
I.getNumOperands() != 2)
|
|
return false;
|
|
|
|
switch (I.getOperand(0).getSubReg()) {
|
|
default:
|
|
return false;
|
|
case AMDGPU::sub0:
|
|
if (Def0)
|
|
return false;
|
|
Def0 = &I;
|
|
Init |= I.getOperand(1).getImm() & 0xffffffff;
|
|
break;
|
|
case AMDGPU::sub1:
|
|
if (Def1)
|
|
return false;
|
|
Def1 = &I;
|
|
Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// For AGPR reg, check if live intervals need to be updated.
|
|
if (IsAGPRDst) {
|
|
if (Changed) {
|
|
for (Register RegToUpdate : ModifiedRegs) {
|
|
LIS->removeInterval(RegToUpdate);
|
|
LIS->createAndComputeVirtRegInterval(RegToUpdate);
|
|
}
|
|
}
|
|
|
|
return Changed;
|
|
}
|
|
|
|
// For SGPR reg, check if we can combine instructions.
|
|
if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
|
|
return Changed;
|
|
|
|
LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
|
|
<< " =>\n");
|
|
|
|
if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
|
|
LIS->getInstructionIndex(*Def0)))
|
|
std::swap(Def0, Def1);
|
|
|
|
LIS->RemoveMachineInstrFromMaps(*Def0);
|
|
LIS->RemoveMachineInstrFromMaps(*Def1);
|
|
auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
|
|
TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
|
|
.addImm(Init);
|
|
|
|
Def0->eraseFromParent();
|
|
Def1->eraseFromParent();
|
|
LIS->InsertMachineInstrInMaps(*NewI);
|
|
LIS->removeInterval(Reg);
|
|
LIS->createAndComputeVirtRegInterval(Reg);
|
|
|
|
LLVM_DEBUG(dbgs() << " " << *NewI);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
TII = ST.getInstrInfo();
|
|
MRI = &MF.getRegInfo();
|
|
LIS = &getAnalysis<LiveIntervals>();
|
|
TRI = ST.getRegisterInfo();
|
|
|
|
bool Changed = false;
|
|
|
|
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
|
|
Register Reg = Register::index2VirtReg(I);
|
|
if (!LIS->hasInterval(Reg))
|
|
continue;
|
|
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
|
if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
|
|
(ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
|
|
continue;
|
|
|
|
Changed |= processReg(Reg);
|
|
}
|
|
|
|
return Changed;
|
|
}
|