forked from OSchip/llvm-project
235 lines
8.8 KiB
C++
235 lines
8.8 KiB
C++
//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file implements the Localizer class.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/CodeGen/GlobalISel/Localizer.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/InitializePasses.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#define DEBUG_TYPE "localizer"
|
|
|
|
using namespace llvm;
|
|
|
|
char Localizer::ID = 0;
|
|
INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
|
|
"Move/duplicate certain instructions close to their use",
|
|
false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
|
INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
|
|
"Move/duplicate certain instructions close to their use",
|
|
false, false)
|
|
|
|
Localizer::Localizer(std::function<bool(const MachineFunction &)> F)
|
|
: MachineFunctionPass(ID), DoNotRunPass(F) {}
|
|
|
|
Localizer::Localizer()
|
|
: Localizer([](const MachineFunction &) { return false; }) {}
|
|
|
|
void Localizer::init(MachineFunction &MF) {
|
|
MRI = &MF.getRegInfo();
|
|
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
|
|
}
|
|
|
|
bool Localizer::shouldLocalize(const MachineInstr &MI) {
|
|
// Assuming a spill and reload of a value has a cost of 1 instruction each,
|
|
// this helper function computes the maximum number of uses we should consider
|
|
// for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
|
|
// break even in terms of code size when the original MI has 2 users vs
|
|
// choosing to potentially spill. Any more than 2 users we we have a net code
|
|
// size increase. This doesn't take into account register pressure though.
|
|
auto maxUses = [](unsigned RematCost) {
|
|
// A cost of 1 means remats are basically free.
|
|
if (RematCost == 1)
|
|
return UINT_MAX;
|
|
if (RematCost == 2)
|
|
return 2U;
|
|
|
|
// Remat is too expensive, only sink if there's one user.
|
|
if (RematCost > 2)
|
|
return 1U;
|
|
llvm_unreachable("Unexpected remat cost");
|
|
};
|
|
|
|
// Helper to walk through uses and terminate if we've reached a limit. Saves
|
|
// us spending time traversing uses if all we want to know is if it's >= min.
|
|
auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
|
|
unsigned NumUses = 0;
|
|
auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
|
|
for (; UI != UE && NumUses < MaxUses; ++UI) {
|
|
NumUses++;
|
|
}
|
|
// If we haven't reached the end yet then there are more than MaxUses users.
|
|
return UI == UE;
|
|
};
|
|
|
|
switch (MI.getOpcode()) {
|
|
default:
|
|
return false;
|
|
// Constants-like instructions should be close to their users.
|
|
// We don't want long live-ranges for them.
|
|
case TargetOpcode::G_CONSTANT:
|
|
case TargetOpcode::G_FCONSTANT:
|
|
case TargetOpcode::G_FRAME_INDEX:
|
|
case TargetOpcode::G_INTTOPTR:
|
|
return true;
|
|
case TargetOpcode::G_GLOBAL_VALUE: {
|
|
unsigned RematCost = TTI->getGISelRematGlobalCost();
|
|
Register Reg = MI.getOperand(0).getReg();
|
|
unsigned MaxUses = maxUses(RematCost);
|
|
if (MaxUses == UINT_MAX)
|
|
return true; // Remats are "free" so always localize.
|
|
bool B = isUsesAtMost(Reg, MaxUses);
|
|
return B;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
|
|
MachineBasicBlock *&InsertMBB) {
|
|
MachineInstr &MIUse = *MOUse.getParent();
|
|
InsertMBB = MIUse.getParent();
|
|
if (MIUse.isPHI())
|
|
InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
|
|
return InsertMBB == Def.getParent();
|
|
}
|
|
|
|
bool Localizer::localizeInterBlock(MachineFunction &MF,
|
|
LocalizedSetVecT &LocalizedInstrs) {
|
|
bool Changed = false;
|
|
DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
|
|
|
|
// Since the IRTranslator only emits constants into the entry block, and the
|
|
// rest of the GISel pipeline generally emits constants close to their users,
|
|
// we only localize instructions in the entry block here. This might change if
|
|
// we start doing CSE across blocks.
|
|
auto &MBB = MF.front();
|
|
for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
|
|
MachineInstr &MI = *RI;
|
|
if (!shouldLocalize(MI))
|
|
continue;
|
|
LLVM_DEBUG(dbgs() << "Should localize: " << MI);
|
|
assert(MI.getDesc().getNumDefs() == 1 &&
|
|
"More than one definition not supported yet");
|
|
Register Reg = MI.getOperand(0).getReg();
|
|
// Check if all the users of MI are local.
|
|
// We are going to invalidation the list of use operands, so we
|
|
// can't use range iterator.
|
|
for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
|
|
MOIt != MOItEnd;) {
|
|
MachineOperand &MOUse = *MOIt++;
|
|
// Check if the use is already local.
|
|
MachineBasicBlock *InsertMBB;
|
|
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
|
|
dbgs() << "Checking use: " << MIUse
|
|
<< " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
|
|
if (isLocalUse(MOUse, MI, InsertMBB))
|
|
continue;
|
|
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
|
|
Changed = true;
|
|
auto MBBAndReg = std::make_pair(InsertMBB, Reg);
|
|
auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
|
|
if (NewVRegIt == MBBWithLocalDef.end()) {
|
|
// Create the localized instruction.
|
|
MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
|
|
LocalizedInstrs.insert(LocalizedMI);
|
|
MachineInstr &UseMI = *MOUse.getParent();
|
|
if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
|
|
InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
|
|
else
|
|
InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
|
|
LocalizedMI);
|
|
|
|
// Set a new register for the definition.
|
|
Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
|
|
MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
|
|
LocalizedMI->getOperand(0).setReg(NewReg);
|
|
NewVRegIt =
|
|
MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
|
|
LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
|
|
}
|
|
LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
|
|
<< '\n');
|
|
// Update the user reg.
|
|
MOUse.setReg(NewVRegIt->second);
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
|
|
bool Changed = false;
|
|
|
|
// For each already-localized instruction which has multiple users, then we
|
|
// scan the block top down from the current position until we hit one of them.
|
|
|
|
// FIXME: Consider doing inst duplication if live ranges are very long due to
|
|
// many users, but this case may be better served by regalloc improvements.
|
|
|
|
for (MachineInstr *MI : LocalizedInstrs) {
|
|
Register Reg = MI->getOperand(0).getReg();
|
|
MachineBasicBlock &MBB = *MI->getParent();
|
|
// All of the user MIs of this reg.
|
|
SmallPtrSet<MachineInstr *, 32> Users;
|
|
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
|
|
if (!UseMI.isPHI())
|
|
Users.insert(&UseMI);
|
|
}
|
|
// If all the users were PHIs then they're not going to be in our block,
|
|
// don't try to move this instruction.
|
|
if (Users.empty())
|
|
continue;
|
|
|
|
MachineBasicBlock::iterator II(MI);
|
|
++II;
|
|
while (II != MBB.end() && !Users.count(&*II))
|
|
++II;
|
|
|
|
LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
|
|
<< "\n");
|
|
assert(II != MBB.end() && "Didn't find the user in the MBB");
|
|
MI->removeFromParent();
|
|
MBB.insert(II, MI);
|
|
Changed = true;
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool Localizer::runOnMachineFunction(MachineFunction &MF) {
|
|
// If the ISel pipeline failed, do not bother running that pass.
|
|
if (MF.getProperties().hasProperty(
|
|
MachineFunctionProperties::Property::FailedISel))
|
|
return false;
|
|
|
|
// Don't run the pass if the target asked so.
|
|
if (DoNotRunPass(MF))
|
|
return false;
|
|
|
|
LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
|
|
|
|
init(MF);
|
|
|
|
// Keep track of the instructions we localized. We'll do a second pass of
|
|
// intra-block localization to further reduce live ranges.
|
|
LocalizedSetVecT LocalizedInstrs;
|
|
|
|
bool Changed = localizeInterBlock(MF, LocalizedInstrs);
|
|
Changed |= localizeIntraBlock(LocalizedInstrs);
|
|
return Changed;
|
|
}
|