forked from OSchip/llvm-project
[ModuloSchedule] Introduce PeelingModuloScheduleExpander
This is the beginnings of a reimplementation of ModuloScheduleExpander. It works by generating a single-block correct pipelined kernel and then peeling out the prolog and epilogs. This patch implements kernel generation as well as a validator that will confirm the number of phis added is the same as the ModuloScheduleExpander. Prolog and epilog peeling will come in a different patch. Differential Revision: https://reviews.llvm.org/D67081 llvm-svn: 370893
This commit is contained in:
parent
92e13f2eab
commit
fef9f59055
|
@ -140,6 +140,11 @@ public:
|
||||||
|
|
||||||
/// Return the rescheduled instructions in order.
|
/// Return the rescheduled instructions in order.
|
||||||
ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
|
ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
|
||||||
|
|
||||||
|
void dump() {
|
||||||
|
print(dbgs());
|
||||||
|
}
|
||||||
|
void print(raw_ostream &OS);
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
|
/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
|
||||||
|
@ -162,6 +167,7 @@ private:
|
||||||
|
|
||||||
MachineBasicBlock *BB;
|
MachineBasicBlock *BB;
|
||||||
MachineBasicBlock *Preheader;
|
MachineBasicBlock *Preheader;
|
||||||
|
MachineBasicBlock *NewKernel = nullptr;
|
||||||
|
|
||||||
/// Map for each register and the max difference between its uses and def.
|
/// Map for each register and the max difference between its uses and def.
|
||||||
/// The first element in the pair is the max difference in stages. The
|
/// The first element in the pair is the max difference in stages. The
|
||||||
|
@ -252,6 +258,38 @@ public:
|
||||||
|
|
||||||
/// Performs the actual expansion.
|
/// Performs the actual expansion.
|
||||||
void expand();
|
void expand();
|
||||||
|
/// Performs final cleanup after expansion.
|
||||||
|
void cleanup();
|
||||||
|
|
||||||
|
/// Returns the newly rewritten kernel block, or nullptr if this was
|
||||||
|
/// optimized away.
|
||||||
|
MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A reimplementation of ModuloScheduleExpander. It works by generating a
|
||||||
|
/// standalone kernel loop and peeling out the prologs and epilogs.
|
||||||
|
///
|
||||||
|
/// FIXME: This implementation cannot yet generate valid code. It can generate
|
||||||
|
/// a correct kernel but cannot peel out prologs and epilogs.
|
||||||
|
class PeelingModuloScheduleExpander {
|
||||||
|
ModuloSchedule &Schedule;
|
||||||
|
MachineFunction &MF;
|
||||||
|
const TargetSubtargetInfo &ST;
|
||||||
|
MachineRegisterInfo &MRI;
|
||||||
|
const TargetInstrInfo *TII;
|
||||||
|
LiveIntervals *LIS;
|
||||||
|
|
||||||
|
MachineBasicBlock *BB;
|
||||||
|
MachineBasicBlock *Preheader;
|
||||||
|
public:
|
||||||
|
PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
|
||||||
|
LiveIntervals *LIS)
|
||||||
|
: Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
|
||||||
|
TII(ST.getInstrInfo()), LIS(LIS) {}
|
||||||
|
|
||||||
|
/// Runs ModuloScheduleExpander and treats it as a golden input to validate
|
||||||
|
/// aspects of the code generated by PeelingModuloScheduleExpander.
|
||||||
|
void validateAgainstModuloScheduleExpander();
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Expander that simply annotates each scheduled instruction with a post-instr
|
/// Expander that simply annotates each scheduled instruction with a post-instr
|
||||||
|
|
|
@ -160,6 +160,11 @@ static cl::opt<bool> EmitTestAnnotations(
|
||||||
"with the generated schedule for feeding into the "
|
"with the generated schedule for feeding into the "
|
||||||
"-modulo-schedule-test pass"));
|
"-modulo-schedule-test pass"));
|
||||||
|
|
||||||
|
static cl::opt<bool> ExperimentalCodeGen(
|
||||||
|
"pipeliner-experimental-cg", cl::Hidden, cl::init(false),
|
||||||
|
cl::desc(
|
||||||
|
"Use the experimental peeling code generator for software pipelining"));
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
// A command line option to enable the CopyToPhi DAG mutation.
|
// A command line option to enable the CopyToPhi DAG mutation.
|
||||||
|
@ -549,8 +554,18 @@ void SwingSchedulerDAG::schedule() {
|
||||||
MSTI.annotate();
|
MSTI.annotate();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
|
// The experimental code generator can't work if there are InstChanges.
|
||||||
MSE.expand();
|
if (ExperimentalCodeGen && NewInstrChanges.empty()) {
|
||||||
|
PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
|
||||||
|
// Experimental code generation isn't complete yet, but it can partially
|
||||||
|
// validate the code it generates against the original
|
||||||
|
// ModuloScheduleExpander.
|
||||||
|
MSE.validateAgainstModuloScheduleExpander();
|
||||||
|
} else {
|
||||||
|
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
|
||||||
|
MSE.expand();
|
||||||
|
MSE.cleanup();
|
||||||
|
}
|
||||||
++NumPipelined;
|
++NumPipelined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,17 +7,24 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "llvm/CodeGen/ModuloSchedule.h"
|
#include "llvm/CodeGen/ModuloSchedule.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#include "llvm/ADT/StringExtras.h"
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/CodeGen/LiveIntervals.h"
|
#include "llvm/CodeGen/LiveIntervals.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||||
#include "llvm/MC/MCContext.h"
|
#include "llvm/MC/MCContext.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
#define DEBUG_TYPE "pipeliner"
|
#define DEBUG_TYPE "pipeliner"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
void ModuloSchedule::print(raw_ostream &OS) {
|
||||||
|
for (MachineInstr *MI : ScheduledInstrs)
|
||||||
|
OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// ModuloScheduleExpander implementation
|
// ModuloScheduleExpander implementation
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -139,6 +146,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
|
||||||
InstrMap[NewMI] = &*I;
|
InstrMap[NewMI] = &*I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NewKernel = KernelBB;
|
||||||
KernelBB->transferSuccessors(BB);
|
KernelBB->transferSuccessors(BB);
|
||||||
KernelBB->replaceSuccessor(BB, KernelBB);
|
KernelBB->replaceSuccessor(BB, KernelBB);
|
||||||
|
|
||||||
|
@ -163,13 +171,15 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
|
||||||
// Add branches between prolog and epilog blocks.
|
// Add branches between prolog and epilog blocks.
|
||||||
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
|
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
|
||||||
|
|
||||||
|
delete[] VRMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ModuloScheduleExpander::cleanup() {
|
||||||
// Remove the original loop since it's no longer referenced.
|
// Remove the original loop since it's no longer referenced.
|
||||||
for (auto &I : *BB)
|
for (auto &I : *BB)
|
||||||
LIS.RemoveMachineInstrFromMaps(I);
|
LIS.RemoveMachineInstrFromMaps(I);
|
||||||
BB->clear();
|
BB->clear();
|
||||||
BB->eraseFromParent();
|
BB->eraseFromParent();
|
||||||
|
|
||||||
delete[] VRMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate the pipeline prolog code.
|
/// Generate the pipeline prolog code.
|
||||||
|
@ -886,6 +896,8 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
|
||||||
}
|
}
|
||||||
LastPro->clear();
|
LastPro->clear();
|
||||||
LastPro->eraseFromParent();
|
LastPro->eraseFromParent();
|
||||||
|
if (LastPro == KernelBB)
|
||||||
|
NewKernel = nullptr;
|
||||||
} else {
|
} else {
|
||||||
numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
|
numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
|
||||||
removePhis(Epilog, Prolog);
|
removePhis(Epilog, Prolog);
|
||||||
|
@ -1196,6 +1208,449 @@ bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
|
||||||
return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
|
return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// PeelingModuloScheduleExpander implementation
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// This is a reimplementation of ModuloScheduleExpander that works by creating
|
||||||
|
// a fully correct steady-state kernel and peeling off the prolog and epilogs.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// Remove any dead phis in MBB. Dead phis either have only one block as input
|
||||||
|
// (in which case they are the identity) or have no uses.
|
||||||
|
void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
|
||||||
|
LiveIntervals *LIS) {
|
||||||
|
bool Changed = true;
|
||||||
|
while (Changed) {
|
||||||
|
Changed = false;
|
||||||
|
for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
|
||||||
|
MachineInstr &MI = *I++;
|
||||||
|
assert(MI.isPHI());
|
||||||
|
if (MRI.use_empty(MI.getOperand(0).getReg())) {
|
||||||
|
if (LIS)
|
||||||
|
LIS->RemoveMachineInstrFromMaps(MI);
|
||||||
|
MI.eraseFromParent();
|
||||||
|
Changed = true;
|
||||||
|
} else if (MI.getNumExplicitOperands() == 3) {
|
||||||
|
MRI.constrainRegClass(MI.getOperand(1).getReg(),
|
||||||
|
MRI.getRegClass(MI.getOperand(0).getReg()));
|
||||||
|
MRI.replaceRegWith(MI.getOperand(0).getReg(),
|
||||||
|
MI.getOperand(1).getReg());
|
||||||
|
if (LIS)
|
||||||
|
LIS->RemoveMachineInstrFromMaps(MI);
|
||||||
|
MI.eraseFromParent();
|
||||||
|
Changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Rewrites the kernel block in-place to adhere to the given schedule.
|
||||||
|
/// KernelRewriter holds all of the state required to perform the rewriting.
|
||||||
|
class KernelRewriter {
|
||||||
|
ModuloSchedule &S;
|
||||||
|
MachineBasicBlock *BB;
|
||||||
|
MachineBasicBlock *PreheaderBB, *ExitBB;
|
||||||
|
MachineRegisterInfo &MRI;
|
||||||
|
const TargetInstrInfo *TII;
|
||||||
|
LiveIntervals *LIS;
|
||||||
|
|
||||||
|
// Map from register class to canonical undef register for that class.
|
||||||
|
DenseMap<const TargetRegisterClass *, Register> Undefs;
|
||||||
|
// Map from <LoopReg, InitReg> to phi register for all created phis. Note that
|
||||||
|
// this map is only used when InitReg is non-undef.
|
||||||
|
DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
|
||||||
|
// Map from LoopReg to phi register where the InitReg is undef.
|
||||||
|
DenseMap<Register, Register> UndefPhis;
|
||||||
|
|
||||||
|
// Reg is used by MI. Return the new register MI should use to adhere to the
|
||||||
|
// schedule. Insert phis as necessary.
|
||||||
|
Register remapUse(Register Reg, MachineInstr &MI);
|
||||||
|
// Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
|
||||||
|
// If InitReg is not given it is chosen arbitrarily. It will either be undef
|
||||||
|
// or will be chosen so as to share another phi.
|
||||||
|
Register phi(Register LoopReg, Optional<Register> InitReg = {},
|
||||||
|
const TargetRegisterClass *RC = nullptr);
|
||||||
|
// Create an undef register of the given register class.
|
||||||
|
Register undef(const TargetRegisterClass *RC);
|
||||||
|
|
||||||
|
public:
|
||||||
|
KernelRewriter(MachineLoop &L, ModuloSchedule &S,
|
||||||
|
LiveIntervals *LIS = nullptr);
|
||||||
|
void rewrite();
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
|
||||||
|
LiveIntervals *LIS)
|
||||||
|
: S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
|
||||||
|
ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
|
||||||
|
TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
|
||||||
|
PreheaderBB = *BB->pred_begin();
|
||||||
|
if (PreheaderBB == BB)
|
||||||
|
PreheaderBB = *std::next(BB->pred_begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
void KernelRewriter::rewrite() {
|
||||||
|
// Rearrange the loop to be in schedule order. Note that the schedule may
|
||||||
|
// contain instructions that are not owned by the loop block (InstrChanges and
|
||||||
|
// friends), so we gracefully handle unowned instructions and delete any
|
||||||
|
// instructions that weren't in the schedule.
|
||||||
|
auto InsertPt = BB->getFirstTerminator();
|
||||||
|
MachineInstr *FirstMI = nullptr;
|
||||||
|
for (MachineInstr *MI : S.getInstructions()) {
|
||||||
|
if (MI->isPHI())
|
||||||
|
continue;
|
||||||
|
if (MI->getParent())
|
||||||
|
MI->removeFromParent();
|
||||||
|
BB->insert(InsertPt, MI);
|
||||||
|
if (!FirstMI)
|
||||||
|
FirstMI = MI;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point all of the scheduled instructions are between FirstMI
|
||||||
|
// and the end of the block. Kill from the first non-phi to FirstMI.
|
||||||
|
for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
|
||||||
|
if (LIS)
|
||||||
|
LIS->RemoveMachineInstrFromMaps(*I);
|
||||||
|
(I++)->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now remap every instruction in the loop.
|
||||||
|
for (MachineInstr &MI : *BB) {
|
||||||
|
if (MI.isPHI())
|
||||||
|
continue;
|
||||||
|
for (MachineOperand &MO : MI.uses()) {
|
||||||
|
if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
|
||||||
|
continue;
|
||||||
|
Register Reg = remapUse(MO.getReg(), MI);
|
||||||
|
MO.setReg(Reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EliminateDeadPhis(BB, MRI, LIS);
|
||||||
|
|
||||||
|
// Ensure a phi exists for all instructions that are either referenced by
|
||||||
|
// an illegal phi or by an instruction outside the loop. This allows us to
|
||||||
|
// treat remaps of these values the same as "normal" values that come from
|
||||||
|
// loop-carried phis.
|
||||||
|
for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
|
||||||
|
if (MI->isPHI()) {
|
||||||
|
Register R = MI->getOperand(0).getReg();
|
||||||
|
phi(R);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (MachineOperand &Def : MI->defs()) {
|
||||||
|
for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
|
||||||
|
if (MI.getParent() != BB) {
|
||||||
|
phi(Def.getReg());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
|
||||||
|
MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
|
||||||
|
if (!Producer)
|
||||||
|
return Reg;
|
||||||
|
|
||||||
|
int ConsumerStage = S.getStage(&MI);
|
||||||
|
if (!Producer->isPHI()) {
|
||||||
|
// Non-phi producers are simple to remap. Insert as many phis as the
|
||||||
|
// difference between the consumer and producer stages.
|
||||||
|
if (Producer->getParent() != BB)
|
||||||
|
// Producer was not inside the loop. Use the register as-is.
|
||||||
|
return Reg;
|
||||||
|
int ProducerStage = S.getStage(Producer);
|
||||||
|
assert(ConsumerStage != -1 &&
|
||||||
|
"In-loop consumer should always be scheduled!");
|
||||||
|
assert(ConsumerStage >= ProducerStage);
|
||||||
|
unsigned StageDiff = ConsumerStage - ProducerStage;
|
||||||
|
|
||||||
|
for (unsigned I = 0; I < StageDiff; ++I)
|
||||||
|
Reg = phi(Reg);
|
||||||
|
return Reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, dive through the phi chain to find the defaults for the generated
|
||||||
|
// phis.
|
||||||
|
SmallVector<Optional<Register>, 4> Defaults;
|
||||||
|
Register LoopReg = Reg;
|
||||||
|
auto LoopProducer = Producer;
|
||||||
|
while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
|
||||||
|
LoopReg = getLoopPhiReg(*LoopProducer, BB);
|
||||||
|
Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
|
||||||
|
LoopProducer = MRI.getUniqueVRegDef(LoopReg);
|
||||||
|
assert(LoopProducer);
|
||||||
|
}
|
||||||
|
int LoopProducerStage = S.getStage(LoopProducer);
|
||||||
|
int LoopProducerCycle = S.getCycle(LoopProducer);
|
||||||
|
int ConsumerCycle = S.getCycle(&MI);
|
||||||
|
|
||||||
|
Optional<Register> IllegalPhiDefault;
|
||||||
|
|
||||||
|
if (LoopProducerStage == -1) {
|
||||||
|
// Do nothing.
|
||||||
|
} else if (LoopProducerStage > ConsumerStage) {
|
||||||
|
// This schedule is only representable if ProducerStage == ConsumerStage+1.
|
||||||
|
// In addition, Consumer's cycle must be scheduled after Producer in the
|
||||||
|
// rescheduled loop.
|
||||||
|
assert(LoopProducerCycle <= ConsumerCycle);
|
||||||
|
assert(LoopProducerStage == ConsumerStage + 1);
|
||||||
|
// Peel off the first phi from Defaults and insert a phi between producer
|
||||||
|
// and consumer. This phi will not be at the front of the block so we
|
||||||
|
// consider it illegal. It will only exist during the rewrite process; it
|
||||||
|
// needs to exist while we peel off prologs because these could take the
|
||||||
|
// default value. After that we can replace all uses with the loop producer
|
||||||
|
// value.
|
||||||
|
IllegalPhiDefault = Defaults.front();
|
||||||
|
Defaults.erase(Defaults.begin());
|
||||||
|
} else {
|
||||||
|
assert(ConsumerStage >= LoopProducerStage);
|
||||||
|
int StageDiff = ConsumerStage - LoopProducerStage;
|
||||||
|
if (StageDiff > 0) {
|
||||||
|
LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
|
||||||
|
<< " to " << (Defaults.size() + StageDiff) << "\n");
|
||||||
|
// If we need more phis than we have defaults for, pad out with undefs for
|
||||||
|
// the earliest phis, which are at the end of the defaults chain (the
|
||||||
|
// chain is in reverse order).
|
||||||
|
Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
|
||||||
|
? Optional<Register>()
|
||||||
|
: Defaults.back());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we know the number of stages to jump back, insert the phi chain.
|
||||||
|
auto DefaultI = Defaults.rbegin();
|
||||||
|
while (DefaultI != Defaults.rend())
|
||||||
|
LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
|
||||||
|
|
||||||
|
if (IllegalPhiDefault.hasValue()) {
|
||||||
|
// The consumer optionally consumes LoopProducer in the same iteration
|
||||||
|
// (because the producer is scheduled at an earlier cycle than the consumer)
|
||||||
|
// or the initial value. To facilitate this we create an illegal block here
|
||||||
|
// by embedding a phi in the middle of the block. We will fix this up
|
||||||
|
// immediately prior to pruning.
|
||||||
|
auto RC = MRI.getRegClass(Reg);
|
||||||
|
Register R = MRI.createVirtualRegister(RC);
|
||||||
|
BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
|
||||||
|
.addReg(IllegalPhiDefault.getValue())
|
||||||
|
.addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
|
||||||
|
.addReg(LoopReg)
|
||||||
|
.addMBB(BB); // Block choice is arbitrary and has no effect.
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LoopReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
|
||||||
|
const TargetRegisterClass *RC) {
|
||||||
|
// If the init register is not undef, try and find an existing phi.
|
||||||
|
if (InitReg.hasValue()) {
|
||||||
|
auto I = Phis.find({LoopReg, InitReg.getValue()});
|
||||||
|
if (I != Phis.end())
|
||||||
|
return I->second;
|
||||||
|
} else {
|
||||||
|
for (auto &KV : Phis) {
|
||||||
|
if (KV.first.first == LoopReg)
|
||||||
|
return KV.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// InitReg is either undef or no existing phi takes InitReg as input. Try and
|
||||||
|
// find a phi that takes undef as input.
|
||||||
|
auto I = UndefPhis.find(LoopReg);
|
||||||
|
if (I != UndefPhis.end()) {
|
||||||
|
Register R = I->second;
|
||||||
|
if (!InitReg.hasValue())
|
||||||
|
// Found a phi taking undef as input, and this input is undef so return
|
||||||
|
// without any more changes.
|
||||||
|
return R;
|
||||||
|
// Found a phi taking undef as input, so rewrite it to take InitReg.
|
||||||
|
MachineInstr *MI = MRI.getVRegDef(R);
|
||||||
|
MI->getOperand(1).setReg(InitReg.getValue());
|
||||||
|
Phis.insert({{LoopReg, InitReg.getValue()}, R});
|
||||||
|
MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
|
||||||
|
UndefPhis.erase(I);
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Failed to find any existing phi to reuse, so create a new one.
|
||||||
|
if (!RC)
|
||||||
|
RC = MRI.getRegClass(LoopReg);
|
||||||
|
Register R = MRI.createVirtualRegister(RC);
|
||||||
|
if (InitReg.hasValue())
|
||||||
|
MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
|
||||||
|
BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
|
||||||
|
.addReg(InitReg.hasValue() ? *InitReg : undef(RC))
|
||||||
|
.addMBB(PreheaderBB)
|
||||||
|
.addReg(LoopReg)
|
||||||
|
.addMBB(BB);
|
||||||
|
if (!InitReg.hasValue())
|
||||||
|
UndefPhis[LoopReg] = R;
|
||||||
|
else
|
||||||
|
Phis[{LoopReg, *InitReg}] = R;
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
Register KernelRewriter::undef(const TargetRegisterClass *RC) {
|
||||||
|
Register &R = Undefs[RC];
|
||||||
|
if (R == 0) {
|
||||||
|
// Create an IMPLICIT_DEF that defines this register if we need it.
|
||||||
|
// All uses of this should be removed by the time we have finished unrolling
|
||||||
|
// prologs and epilogs.
|
||||||
|
R = MRI.createVirtualRegister(RC);
|
||||||
|
auto *InsertBB = &PreheaderBB->getParent()->front();
|
||||||
|
BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
|
||||||
|
TII->get(TargetOpcode::IMPLICIT_DEF), R);
|
||||||
|
}
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/// Describes an operand in the kernel of a pipelined loop. Characteristics of
|
||||||
|
/// the operand are discovered, such as how many in-loop PHIs it has to jump
|
||||||
|
/// through and defaults for these phis.
|
||||||
|
class KernelOperandInfo {
|
||||||
|
MachineBasicBlock *BB;
|
||||||
|
MachineRegisterInfo &MRI;
|
||||||
|
SmallVector<Register, 4> PhiDefaults;
|
||||||
|
MachineOperand *Source;
|
||||||
|
MachineOperand *Target;
|
||||||
|
|
||||||
|
public:
|
||||||
|
KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
|
||||||
|
const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
|
||||||
|
: MRI(MRI) {
|
||||||
|
Source = MO;
|
||||||
|
BB = MO->getParent()->getParent();
|
||||||
|
while (isRegInLoop(MO)) {
|
||||||
|
MachineInstr *MI = MRI.getVRegDef(MO->getReg());
|
||||||
|
if (MI->isFullCopy()) {
|
||||||
|
MO = &MI->getOperand(1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!MI->isPHI())
|
||||||
|
break;
|
||||||
|
// If this is an illegal phi, don't count it in distance.
|
||||||
|
if (IllegalPhis.count(MI)) {
|
||||||
|
MO = &MI->getOperand(3);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Register Default = getInitPhiReg(*MI, BB);
|
||||||
|
MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
|
||||||
|
: &MI->getOperand(3);
|
||||||
|
PhiDefaults.push_back(Default);
|
||||||
|
}
|
||||||
|
Target = MO;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const KernelOperandInfo &Other) const {
|
||||||
|
return PhiDefaults.size() == Other.PhiDefaults.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void print(raw_ostream &OS) const {
|
||||||
|
OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
|
||||||
|
<< *Source->getParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool isRegInLoop(MachineOperand *MO) {
|
||||||
|
return MO->isReg() && MO->getReg().isVirtual() &&
|
||||||
|
MRI.getVRegDef(MO->getReg())->getParent() == BB;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
|
||||||
|
BB = Schedule.getLoop()->getTopBlock();
|
||||||
|
Preheader = Schedule.getLoop()->getLoopPreheader();
|
||||||
|
|
||||||
|
// Dump the schedule before we invalidate and remap all its instructions.
|
||||||
|
// Stash it in a string so we can print it if we found an error.
|
||||||
|
std::string ScheduleDump;
|
||||||
|
raw_string_ostream OS(ScheduleDump);
|
||||||
|
Schedule.print(OS);
|
||||||
|
OS.flush();
|
||||||
|
|
||||||
|
// First, run the normal ModuleScheduleExpander. We don't support any
|
||||||
|
// InstrChanges.
|
||||||
|
assert(LIS && "Requires LiveIntervals!");
|
||||||
|
ModuloScheduleExpander MSE(MF, Schedule, *LIS,
|
||||||
|
ModuloScheduleExpander::InstrChangesTy());
|
||||||
|
MSE.expand();
|
||||||
|
MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
|
||||||
|
if (!ExpandedKernel) {
|
||||||
|
// The expander optimized away the kernel. We can't do any useful checking.
|
||||||
|
MSE.cleanup();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Before running the KernelRewriter, re-add BB into the CFG.
|
||||||
|
Preheader->addSuccessor(BB);
|
||||||
|
|
||||||
|
// Now run the new expansion algorithm.
|
||||||
|
KernelRewriter KR(*Schedule.getLoop(), Schedule);
|
||||||
|
KR.rewrite();
|
||||||
|
|
||||||
|
// Collect all illegal phis that the new algorithm created. We'll give these
|
||||||
|
// to KernelOperandInfo.
|
||||||
|
SmallPtrSet<MachineInstr *, 4> IllegalPhis;
|
||||||
|
for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
|
||||||
|
if (NI->isPHI())
|
||||||
|
IllegalPhis.insert(&*NI);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Co-iterate across both kernels. We expect them to be identical apart from
|
||||||
|
// phis and full COPYs (we look through both).
|
||||||
|
SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
|
||||||
|
auto OI = ExpandedKernel->begin();
|
||||||
|
auto NI = BB->begin();
|
||||||
|
for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
|
||||||
|
while (OI->isPHI() || OI->isFullCopy())
|
||||||
|
++OI;
|
||||||
|
while (NI->isPHI() || NI->isFullCopy())
|
||||||
|
++NI;
|
||||||
|
assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
|
||||||
|
// Analyze every operand separately.
|
||||||
|
for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
|
||||||
|
OOpI != OI->operands_end(); ++OOpI, ++NOpI)
|
||||||
|
KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
|
||||||
|
KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Failed = false;
|
||||||
|
for (auto &OldAndNew : KOIs) {
|
||||||
|
if (OldAndNew.first == OldAndNew.second)
|
||||||
|
continue;
|
||||||
|
Failed = true;
|
||||||
|
errs() << "Modulo kernel validation error: [\n";
|
||||||
|
errs() << " [golden] ";
|
||||||
|
OldAndNew.first.print(errs());
|
||||||
|
errs() << " ";
|
||||||
|
OldAndNew.second.print(errs());
|
||||||
|
errs() << "]\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Failed) {
|
||||||
|
errs() << "Golden reference kernel:\n";
|
||||||
|
ExpandedKernel->dump();
|
||||||
|
errs() << "New kernel:\n";
|
||||||
|
BB->dump();
|
||||||
|
errs() << ScheduleDump;
|
||||||
|
report_fatal_error(
|
||||||
|
"Modulo kernel validation (-pipeliner-experimental-cg) failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup by removing BB from the CFG again as the original
|
||||||
|
// ModuloScheduleExpander intended.
|
||||||
|
Preheader->removeSuccessor(BB);
|
||||||
|
MSE.cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// ModuloScheduleTestPass implementation
|
// ModuloScheduleTestPass implementation
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -1283,10 +1738,12 @@ void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle), std::move(Stage));
|
ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
|
||||||
|
std::move(Stage));
|
||||||
ModuloScheduleExpander MSE(
|
ModuloScheduleExpander MSE(
|
||||||
MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
|
MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
|
||||||
MSE.expand();
|
MSE.expand();
|
||||||
|
MSE.cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
Loading…
Reference in New Issue