forked from OSchip/llvm-project
[AMDGPU] Iterative scheduling infrastructure + minimal registry scheduler
Differential revision: https://reviews.llvm.org/D31046 llvm-svn: 298368
This commit is contained in:
parent
044e003203
commit
fd4c410f4d
|
@ -22,6 +22,7 @@
|
|||
#include "SIInstrInfo.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "SIFrameLowering.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
|
||||
|
@ -317,6 +318,11 @@ public:
|
|||
/// the given LDS memory size is the only constraint.
|
||||
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
|
||||
|
||||
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
|
||||
const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
|
||||
}
|
||||
|
||||
bool hasFP16Denormals() const {
|
||||
return FP64FP16Denormals;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#endif
|
||||
#include "AMDGPUTargetObjectFile.h"
|
||||
#include "AMDGPUTargetTransformInfo.h"
|
||||
#include "GCNIterativeScheduler.h"
|
||||
#include "GCNSchedStrategy.h"
|
||||
#include "R600MachineScheduler.h"
|
||||
#include "SIMachineScheduler.h"
|
||||
|
@ -155,6 +156,20 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
|
|||
return DAG;
|
||||
}
|
||||
|
||||
static ScheduleDAGInstrs *
|
||||
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
|
||||
auto DAG = new GCNIterativeScheduler(C,
|
||||
GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
|
||||
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
|
||||
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
|
||||
return DAG;
|
||||
}
|
||||
|
||||
static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
|
||||
return new GCNIterativeScheduler(C,
|
||||
GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
|
||||
}
|
||||
|
||||
static MachineSchedRegistry
|
||||
R600SchedRegistry("r600", "Run R600's custom scheduler",
|
||||
createR600MachineScheduler);
|
||||
|
@ -168,6 +183,16 @@ GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
|
|||
"Run GCN scheduler to maximize occupancy",
|
||||
createGCNMaxOccupancyMachineScheduler);
|
||||
|
||||
static MachineSchedRegistry
|
||||
IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
|
||||
"Run GCN scheduler to maximize occupancy (experimental)",
|
||||
createIterativeGCNMaxOccupancyMachineScheduler);
|
||||
|
||||
static MachineSchedRegistry
|
||||
GCNMinRegSchedRegistry("gcn-minreg",
|
||||
"Run GCN iterative scheduler for minimal register usage (experimental)",
|
||||
createMinRegScheduler);
|
||||
|
||||
static StringRef computeDataLayout(const Triple &TT) {
|
||||
if (TT.getArch() == Triple::r600) {
|
||||
// 32-bit pointers.
|
||||
|
|
|
@ -94,6 +94,9 @@ add_llvm_target(AMDGPUCodeGen
|
|||
SIShrinkInstructions.cpp
|
||||
SITypeRewriter.cpp
|
||||
SIWholeQuadMode.cpp
|
||||
GCNIterativeScheduler.cpp
|
||||
GCNMinRegStrategy.cpp
|
||||
GCNRegPressure.cpp
|
||||
${GLOBAL_ISEL_BUILD_FILES}
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,528 @@
|
|||
//===--------------------- GCNIterativeScheduler.cpp - --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "GCNIterativeScheduler.h"
|
||||
#include "GCNSchedStrategy.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "misched"
|
||||
|
||||
namespace llvm {
|
||||
std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
|
||||
const ScheduleDAG &DAG);
|
||||
}
|
||||
|
||||
// shim accessors for different order containers
|
||||
static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
|
||||
return MI;
|
||||
}
|
||||
static inline MachineInstr *getMachineInstr(const SUnit *SU) {
|
||||
return SU->getInstr();
|
||||
}
|
||||
static inline MachineInstr *getMachineInstr(const SUnit &SU) {
|
||||
return SU.getInstr();
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
static void printRegion(raw_ostream &OS,
|
||||
MachineBasicBlock::iterator Begin,
|
||||
MachineBasicBlock::iterator End,
|
||||
const LiveIntervals *LIS,
|
||||
unsigned MaxInstNum =
|
||||
std::numeric_limits<unsigned>::max()) {
|
||||
auto BB = Begin->getParent();
|
||||
OS << BB->getParent()->getName() << ":BB#" << BB->getNumber()
|
||||
<< ' ' << BB->getName() << ":\n";
|
||||
auto I = Begin;
|
||||
MaxInstNum = std::max(MaxInstNum, 1u);
|
||||
for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
|
||||
if (!I->isDebugValue() && LIS)
|
||||
OS << LIS->getInstructionIndex(*I);
|
||||
OS << '\t' << *I;
|
||||
}
|
||||
if (I != End) {
|
||||
OS << "\t...\n";
|
||||
I = std::prev(End);
|
||||
if (!I->isDebugValue() && LIS)
|
||||
OS << LIS->getInstructionIndex(*I);
|
||||
OS << '\t' << *I;
|
||||
}
|
||||
if (End != BB->end()) { // print boundary inst if present
|
||||
OS << "----\n";
|
||||
if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
|
||||
OS << *End;
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
static void printLivenessInfo(raw_ostream &OS,
|
||||
MachineBasicBlock::iterator Begin,
|
||||
MachineBasicBlock::iterator End,
|
||||
const LiveIntervals *LIS) {
|
||||
const auto BB = Begin->getParent();
|
||||
const auto &MRI = BB->getParent()->getRegInfo();
|
||||
|
||||
const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
|
||||
OS << "LIn RP: ";
|
||||
getRegPressure(MRI, LiveIns).print(OS);
|
||||
|
||||
const auto BottomMI = End == BB->end() ? std::prev(End) : End;
|
||||
const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
|
||||
OS << "LOt RP: ";
|
||||
getRegPressure(MRI, LiveOuts).print(OS);
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
for (const auto R : Regions) {
|
||||
OS << "Region to schedule ";
|
||||
printRegion(OS, R->Begin, R->End, LIS, 1);
|
||||
printLivenessInfo(OS, R->Begin, R->End, LIS);
|
||||
OS << "Max RP: ";
|
||||
R->MaxPressure.print(OS, &ST);
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
|
||||
const Region *R,
|
||||
const GCNRegPressure &RP) const {
|
||||
OS << "\nAfter scheduling ";
|
||||
printRegion(OS, R->Begin, R->End, LIS);
|
||||
printSchedRP(OS, R->MaxPressure, RP);
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD
|
||||
void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
|
||||
const GCNRegPressure &Before,
|
||||
const GCNRegPressure &After) const {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
OS << "RP before: ";
|
||||
Before.print(OS, &ST);
|
||||
OS << "RP after: ";
|
||||
After.print(OS, &ST);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// DAG builder helper
|
||||
class GCNIterativeScheduler::BuildDAG {
|
||||
GCNIterativeScheduler &Sch;
|
||||
SmallVector<SUnit*, 8> TopRoots;
|
||||
public:
|
||||
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
|
||||
: Sch(_Sch) {
|
||||
auto BB = R.Begin->getParent();
|
||||
Sch.BaseClass::startBlock(BB);
|
||||
Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
|
||||
|
||||
Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
|
||||
/*TrackLaneMask*/true);
|
||||
Sch.Topo.InitDAGTopologicalSorting();
|
||||
|
||||
SmallVector<SUnit*, 8> BotRoots;
|
||||
Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
|
||||
}
|
||||
~BuildDAG() {
|
||||
Sch.BaseClass::exitRegion();
|
||||
Sch.BaseClass::finishBlock();
|
||||
}
|
||||
ArrayRef<const SUnit*> getTopRoots() const {
|
||||
return TopRoots;
|
||||
}
|
||||
};
|
||||
|
||||
class GCNIterativeScheduler::OverrideLegacyStrategy {
|
||||
GCNIterativeScheduler &Sch;
|
||||
Region &Rgn;
|
||||
std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
|
||||
GCNRegPressure SaveMaxRP;
|
||||
public:
|
||||
OverrideLegacyStrategy(Region &R,
|
||||
MachineSchedStrategy &OverrideStrategy,
|
||||
GCNIterativeScheduler &_Sch)
|
||||
: Sch(_Sch)
|
||||
, Rgn(R)
|
||||
, SaveSchedImpl(std::move(_Sch.SchedImpl))
|
||||
, SaveMaxRP(R.MaxPressure) {
|
||||
Sch.SchedImpl.reset(&OverrideStrategy);
|
||||
auto BB = R.Begin->getParent();
|
||||
Sch.BaseClass::startBlock(BB);
|
||||
Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
|
||||
}
|
||||
~OverrideLegacyStrategy() {
|
||||
Sch.BaseClass::exitRegion();
|
||||
Sch.BaseClass::finishBlock();
|
||||
Sch.SchedImpl.release();
|
||||
Sch.SchedImpl = std::move(SaveSchedImpl);
|
||||
}
|
||||
void schedule() {
|
||||
assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
|
||||
DEBUG(dbgs() << "\nScheduling ";
|
||||
printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
|
||||
Sch.BaseClass::schedule();
|
||||
|
||||
// Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
|
||||
Sch.RegionEnd = Rgn.End;
|
||||
//assert(Rgn.End == Sch.RegionEnd);
|
||||
Rgn.Begin = Sch.RegionBegin;
|
||||
Rgn.MaxPressure.clear();
|
||||
}
|
||||
void restoreOrder() {
|
||||
assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
|
||||
// DAG SUnits are stored using original region's order
|
||||
// so just use SUnits as the restoring schedule
|
||||
Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
|
||||
}
|
||||
};
|
||||
|
||||
// just a stub to make base class happy
|
||||
class SchedStrategyStub : public MachineSchedStrategy {
|
||||
public:
|
||||
bool shouldTrackPressure() const override { return false; }
|
||||
bool shouldTrackLaneMasks() const override { return false; }
|
||||
void initialize(ScheduleDAGMI *DAG) override {}
|
||||
SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
|
||||
void schedNode(SUnit *SU, bool IsTopNode) override {}
|
||||
void releaseTopNode(SUnit *SU) override {}
|
||||
void releaseBottomNode(SUnit *SU) override {}
|
||||
};
|
||||
|
||||
GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
|
||||
StrategyKind S)
|
||||
: BaseClass(C, llvm::make_unique<SchedStrategyStub>())
|
||||
, Context(C)
|
||||
, Strategy(S)
|
||||
, UPTracker(*LIS) {
|
||||
}
|
||||
|
||||
// returns max pressure for a region
|
||||
GCNRegPressure
|
||||
GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
|
||||
MachineBasicBlock::iterator End)
|
||||
const {
|
||||
// For the purpose of pressure tracking bottom inst of the region should
|
||||
// be also processed. End is either BB end, BB terminator inst or sched
|
||||
// boundary inst.
|
||||
auto const BBEnd = Begin->getParent()->end();
|
||||
auto const BottomMI = End == BBEnd ? std::prev(End) : End;
|
||||
|
||||
// scheduleRegions walks bottom to top, so its likely we just get next
|
||||
// instruction to track
|
||||
auto AfterBottomMI = std::next(BottomMI);
|
||||
if (AfterBottomMI == BBEnd ||
|
||||
&*AfterBottomMI != UPTracker.getLastTrackedMI()) {
|
||||
UPTracker.reset(*BottomMI);
|
||||
} else {
|
||||
assert(UPTracker.isValid());
|
||||
}
|
||||
|
||||
for (auto I = BottomMI; I != Begin; --I)
|
||||
UPTracker.recede(*I);
|
||||
|
||||
UPTracker.recede(*Begin);
|
||||
|
||||
assert(UPTracker.isValid() ||
|
||||
(dbgs() << "Tracked region ",
|
||||
printRegion(dbgs(), Begin, End, LIS), false));
|
||||
return UPTracker.moveMaxPressure();
|
||||
}
|
||||
|
||||
// returns max pressure for a tentative schedule
|
||||
template <typename Range> GCNRegPressure
|
||||
GCNIterativeScheduler::getSchedulePressure(const Region &R,
|
||||
Range &&Schedule) const {
|
||||
auto const BBEnd = R.Begin->getParent()->end();
|
||||
GCNUpwardRPTracker RPTracker(*LIS);
|
||||
if (R.End != BBEnd) {
|
||||
// R.End points to the boundary instruction but the
|
||||
// schedule doesn't include it
|
||||
RPTracker.reset(*R.End);
|
||||
RPTracker.recede(*R.End);
|
||||
} else {
|
||||
// R.End doesn't point to the boundary instruction
|
||||
RPTracker.reset(*std::prev(BBEnd));
|
||||
}
|
||||
for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
|
||||
RPTracker.recede(*getMachineInstr(*--I));
|
||||
}
|
||||
return RPTracker.moveMaxPressure();
|
||||
}
|
||||
|
||||
void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
|
||||
MachineBasicBlock::iterator Begin,
|
||||
MachineBasicBlock::iterator End,
|
||||
unsigned NumRegionInstrs) {
|
||||
BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
|
||||
if (NumRegionInstrs > 2) {
|
||||
Regions.push_back(
|
||||
new (Alloc.Allocate())
|
||||
Region { Begin, End, NumRegionInstrs,
|
||||
getRegionPressure(Begin, End), nullptr });
|
||||
}
|
||||
}
|
||||
|
||||
void GCNIterativeScheduler::schedule() { // overriden
|
||||
// do nothing
|
||||
DEBUG(
|
||||
printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
|
||||
if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
|
||||
dbgs() << "Max RP: ";
|
||||
Regions.back()->MaxPressure.print(dbgs(), &MF.getSubtarget<SISubtarget>());
|
||||
}
|
||||
dbgs() << '\n';
|
||||
);
|
||||
}
|
||||
|
||||
void GCNIterativeScheduler::finalizeSchedule() { // overriden
|
||||
if (Regions.empty())
|
||||
return;
|
||||
switch (Strategy) {
|
||||
case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
|
||||
case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
|
||||
case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
|
||||
}
|
||||
}
|
||||
|
||||
// Detach schedule from SUnits and interleave it with debug values.
|
||||
// Returned schedule becomes independent of DAG state.
|
||||
std::vector<MachineInstr*>
|
||||
GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
|
||||
std::vector<MachineInstr*> Res;
|
||||
Res.reserve(Schedule.size() * 2);
|
||||
|
||||
if (FirstDbgValue)
|
||||
Res.push_back(FirstDbgValue);
|
||||
|
||||
const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
|
||||
for (auto SU : Schedule) {
|
||||
Res.push_back(SU->getInstr());
|
||||
const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
|
||||
return P.second == SU->getInstr();
|
||||
});
|
||||
if (D != DbgE)
|
||||
Res.push_back(D->first);
|
||||
}
|
||||
return Res;
|
||||
}
|
||||
|
||||
void GCNIterativeScheduler::setBestSchedule(Region &R,
|
||||
ScheduleRef Schedule,
|
||||
const GCNRegPressure &MaxRP) {
|
||||
R.BestSchedule.reset(
|
||||
new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
|
||||
}
|
||||
|
||||
void GCNIterativeScheduler::scheduleBest(Region &R) {
|
||||
assert(R.BestSchedule.get() && "No schedule specified");
|
||||
scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
|
||||
R.BestSchedule.reset();
|
||||
}
|
||||
|
||||
// minimal required region scheduler, works for ranges of SUnits*,
|
||||
// SUnits or MachineIntrs*
|
||||
template <typename Range>
|
||||
void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
|
||||
const GCNRegPressure &MaxRP) {
|
||||
assert(RegionBegin == R.Begin && RegionEnd == R.End);
|
||||
assert(LIS != nullptr);
|
||||
#ifndef NDEBUG
|
||||
const auto SchedMaxRP = getSchedulePressure(R, Schedule);
|
||||
#endif
|
||||
auto BB = R.Begin->getParent();
|
||||
auto Top = R.Begin;
|
||||
for (const auto &I : Schedule) {
|
||||
auto MI = getMachineInstr(I);
|
||||
if (MI != &*Top) {
|
||||
BB->remove(MI);
|
||||
BB->insert(Top, MI);
|
||||
if (!MI->isDebugValue())
|
||||
LIS->handleMove(*MI, true);
|
||||
}
|
||||
if (!MI->isDebugValue()) {
|
||||
// Reset read - undef flags and update them later.
|
||||
for (auto &Op : MI->operands())
|
||||
if (Op.isReg() && Op.isDef())
|
||||
Op.setIsUndef(false);
|
||||
|
||||
RegisterOperands RegOpers;
|
||||
RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
|
||||
/*IgnoreDead*/false);
|
||||
// Adjust liveness and add missing dead+read-undef flags.
|
||||
auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
|
||||
RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
|
||||
}
|
||||
Top = std::next(MI->getIterator());
|
||||
}
|
||||
RegionBegin = getMachineInstr(Schedule.front());
|
||||
|
||||
// Schedule consisting of MachineInstr* is considered 'detached'
|
||||
// and already interleaved with debug values
|
||||
if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
|
||||
placeDebugValues();
|
||||
// Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
|
||||
//assert(R.End == RegionEnd);
|
||||
RegionEnd = R.End;
|
||||
}
|
||||
|
||||
R.Begin = RegionBegin;
|
||||
R.MaxPressure = MaxRP;
|
||||
|
||||
#ifndef NDEBUG
|
||||
const auto RegionMaxRP = getRegionPressure(R);
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
#endif
|
||||
assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
|
||||
|| (dbgs() << "Max RP mismatch!!!\n"
|
||||
"RP for schedule (calculated): ",
|
||||
SchedMaxRP.print(dbgs(), &ST),
|
||||
dbgs() << "RP for schedule (reported): ",
|
||||
MaxRP.print(dbgs(), &ST),
|
||||
dbgs() << "RP after scheduling: ",
|
||||
RegionMaxRP.print(dbgs(), &ST),
|
||||
false));
|
||||
}
|
||||
|
||||
// Sort recorded regions by pressure - highest at the front
|
||||
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
std::sort(Regions.begin(), Regions.end(),
|
||||
[&ST, TargetOcc](const Region *R1, const Region *R2) {
|
||||
return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
|
||||
});
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Legacy MaxOccupancy Strategy
|
||||
|
||||
// Tries to increase occupancy applying minreg scheduler for a sequence of
|
||||
// most demanding regions. Obtained schedules are saved as BestSchedule for a
|
||||
// region.
|
||||
// TargetOcc is the best achievable occupancy for a kernel.
|
||||
// Returns better occupancy on success or current occupancy on fail.
|
||||
// BestSchedules aren't deleted on fail.
|
||||
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
|
||||
// TODO: assert Regions are sorted descending by pressure
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
|
||||
DEBUG(dbgs() << "Trying to to improve occupancy, target = " << TargetOcc
|
||||
<< ", current = " << Occ << '\n');
|
||||
|
||||
auto NewOcc = TargetOcc;
|
||||
for (auto R : Regions) {
|
||||
if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
|
||||
break;
|
||||
|
||||
DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
|
||||
printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
|
||||
|
||||
BuildDAG DAG(*R, *this);
|
||||
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
|
||||
const auto MaxRP = getSchedulePressure(*R, MinSchedule);
|
||||
DEBUG(dbgs() << "Occupancy improvement attempt:\n";
|
||||
printSchedRP(dbgs(), R->MaxPressure, MaxRP));
|
||||
|
||||
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
|
||||
if (NewOcc <= Occ)
|
||||
break;
|
||||
|
||||
setBestSchedule(*R, MinSchedule, MaxRP);
|
||||
}
|
||||
DEBUG(dbgs() << "New occupancy = " << NewOcc
|
||||
<< ", prev occupancy = " << Occ << '\n');
|
||||
return std::max(NewOcc, Occ);
|
||||
}
|
||||
|
||||
void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
|
||||
bool TryMaximizeOccupancy) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
|
||||
|
||||
sortRegionsByPressure(TgtOcc);
|
||||
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
|
||||
|
||||
if (TryMaximizeOccupancy && Occ < TgtOcc)
|
||||
Occ = tryMaximizeOccupancy(TgtOcc);
|
||||
|
||||
// This is really weird but for some magic scheduling regions twice
|
||||
// gives performance improvement
|
||||
const int NumPasses = Occ < TgtOcc ? 2 : 1;
|
||||
|
||||
TgtOcc = std::min(Occ, TgtOcc);
|
||||
DEBUG(dbgs() << "Scheduling using default scheduler, "
|
||||
"target occupancy = " << TgtOcc << '\n');
|
||||
GCNMaxOccupancySchedStrategy LStrgy(Context);
|
||||
|
||||
for (int I = 0; I < NumPasses; ++I) {
|
||||
// running first pass with TargetOccupancy = 0 mimics previous scheduling
|
||||
// approach and is a performance magic
|
||||
LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
|
||||
for (auto R : Regions) {
|
||||
OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
|
||||
|
||||
Ovr.schedule();
|
||||
const auto RP = getRegionPressure(*R);
|
||||
DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
|
||||
|
||||
if (RP.getOccupancy(ST) < TgtOcc) {
|
||||
DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
|
||||
if (R->BestSchedule.get() &&
|
||||
R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
|
||||
DEBUG(dbgs() << ", scheduling minimal register\n");
|
||||
scheduleBest(*R);
|
||||
} else {
|
||||
DEBUG(dbgs() << ", restoring\n");
|
||||
Ovr.restoreOrder();
|
||||
assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Minimal Register Strategy
|
||||
|
||||
void GCNIterativeScheduler::scheduleMinReg(bool force) {
|
||||
const auto &ST = MF.getSubtarget<SISubtarget>();
|
||||
const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
|
||||
sortRegionsByPressure(TgtOcc);
|
||||
|
||||
auto MaxPressure = Regions.front()->MaxPressure;
|
||||
for (auto R : Regions) {
|
||||
if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
|
||||
break;
|
||||
|
||||
BuildDAG DAG(*R, *this);
|
||||
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
|
||||
|
||||
const auto RP = getSchedulePressure(*R, MinSchedule);
|
||||
DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
|
||||
dbgs() << "\nWarning: Pressure becomes worse after minreg!";
|
||||
printSchedRP(dbgs(), R->MaxPressure, RP);
|
||||
});
|
||||
|
||||
if (!force && MaxPressure.less(ST, RP, TgtOcc))
|
||||
break;
|
||||
|
||||
scheduleRegion(*R, MinSchedule, RP);
|
||||
DEBUG(printSchedResult(dbgs(), R, RP));
|
||||
|
||||
MaxPressure = RP;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
//===--------- GCNIterativeScheduler.h - GCN Scheduler -*- C++ -*----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
|
||||
|
||||
#include "GCNRegPressure.h"
|
||||
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class GCNIterativeScheduler : public ScheduleDAGMILive {
|
||||
typedef ScheduleDAGMILive BaseClass;
|
||||
public:
|
||||
enum StrategyKind {
|
||||
SCHEDULE_MINREGONLY,
|
||||
SCHEDULE_MINREGFORCED,
|
||||
SCHEDULE_LEGACYMAXOCCUPANCY
|
||||
};
|
||||
|
||||
GCNIterativeScheduler(MachineSchedContext *C,
|
||||
StrategyKind S);
|
||||
|
||||
void schedule() override;
|
||||
|
||||
void enterRegion(MachineBasicBlock *BB,
|
||||
MachineBasicBlock::iterator Begin,
|
||||
MachineBasicBlock::iterator End,
|
||||
unsigned RegionInstrs) override;
|
||||
|
||||
void finalizeSchedule() override;
|
||||
|
||||
protected:
|
||||
|
||||
typedef ArrayRef<const SUnit*> ScheduleRef;
|
||||
|
||||
struct TentativeSchedule {
|
||||
std::vector<MachineInstr*> Schedule;
|
||||
GCNRegPressure MaxPressure;
|
||||
};
|
||||
|
||||
struct Region {
|
||||
// Fields except for BestSchedule are supposed to reflect current IR state
|
||||
// `const` fields are to emphasize they shouldn't change for any schedule.
|
||||
MachineBasicBlock::iterator Begin;
|
||||
// End is either a boundary instruction or end of basic block
|
||||
const MachineBasicBlock::iterator End;
|
||||
const unsigned NumRegionInstrs;
|
||||
GCNRegPressure MaxPressure;
|
||||
|
||||
// best schedule for the region so far (not scheduled yet)
|
||||
std::unique_ptr<TentativeSchedule> BestSchedule;
|
||||
};
|
||||
|
||||
SpecificBumpPtrAllocator<Region> Alloc;
|
||||
std::vector<Region*> Regions;
|
||||
|
||||
MachineSchedContext *Context;
|
||||
const StrategyKind Strategy;
|
||||
mutable GCNUpwardRPTracker UPTracker;
|
||||
|
||||
class BuildDAG;
|
||||
class OverrideLegacyStrategy;
|
||||
|
||||
template <typename Range>
|
||||
GCNRegPressure getSchedulePressure(const Region &R,
|
||||
Range &&Schedule) const;
|
||||
|
||||
GCNRegPressure getRegionPressure(MachineBasicBlock::iterator Begin,
|
||||
MachineBasicBlock::iterator End) const;
|
||||
|
||||
GCNRegPressure getRegionPressure(const Region &R) const {
|
||||
return getRegionPressure(R.Begin, R.End);
|
||||
}
|
||||
|
||||
void setBestSchedule(Region &R,
|
||||
ScheduleRef Schedule,
|
||||
const GCNRegPressure &MaxRP = GCNRegPressure());
|
||||
|
||||
void scheduleBest(Region &R);
|
||||
|
||||
std::vector<MachineInstr*> detachSchedule(ScheduleRef Schedule) const;
|
||||
|
||||
void sortRegionsByPressure(unsigned TargetOcc);
|
||||
|
||||
template <typename Range>
|
||||
void scheduleRegion(Region &R, Range &&Schedule,
|
||||
const GCNRegPressure &MaxRP = GCNRegPressure());
|
||||
|
||||
unsigned tryMaximizeOccupancy(unsigned TargetOcc =
|
||||
std::numeric_limits<unsigned>::max());
|
||||
|
||||
void scheduleLegacyMaxOccupancy(bool TryMaximizeOccupancy = true);
|
||||
void scheduleMinReg(bool force = false);
|
||||
|
||||
void printRegions(raw_ostream &OS) const;
|
||||
void printSchedResult(raw_ostream &OS,
|
||||
const Region *R,
|
||||
const GCNRegPressure &RP) const;
|
||||
void printSchedRP(raw_ostream &OS,
|
||||
const GCNRegPressure &Before,
|
||||
const GCNRegPressure &After) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
|
|
@ -0,0 +1,266 @@
|
|||
//===----------------------- GCNMinRegStrategy.cpp - ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "misched"
|
||||
|
||||
class GCNMinRegScheduler {
|
||||
struct Candidate : ilist_node<Candidate> {
|
||||
const SUnit *SU;
|
||||
int Priority;
|
||||
|
||||
Candidate(const SUnit *SU_, int Priority_ = 0)
|
||||
: SU(SU_), Priority(Priority_) {}
|
||||
};
|
||||
|
||||
SpecificBumpPtrAllocator<Candidate> Alloc;
|
||||
typedef simple_ilist<Candidate> Queue;
|
||||
Queue RQ; // Ready queue
|
||||
|
||||
std::vector<unsigned> NumPreds;
|
||||
|
||||
bool isScheduled(const SUnit *SU) const {
|
||||
assert(!SU->isBoundaryNode());
|
||||
return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
|
||||
void setIsScheduled(const SUnit *SU) {
|
||||
assert(!SU->isBoundaryNode());
|
||||
NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
|
||||
unsigned getNumPreds(const SUnit *SU) const {
|
||||
assert(!SU->isBoundaryNode());
|
||||
assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
|
||||
return NumPreds[SU->NodeNum];
|
||||
}
|
||||
|
||||
unsigned decNumPreds(const SUnit *SU) {
|
||||
assert(!SU->isBoundaryNode());
|
||||
assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
|
||||
return --NumPreds[SU->NodeNum];
|
||||
}
|
||||
|
||||
void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
|
||||
|
||||
int getReadySuccessors(const SUnit *SU) const;
|
||||
int getNotReadySuccessors(const SUnit *SU) const;
|
||||
|
||||
template <typename Calc>
|
||||
unsigned findMax(unsigned Num, Calc C);
|
||||
|
||||
Candidate* pickCandidate();
|
||||
|
||||
void bumpPredsPriority(const SUnit *SchedSU, int Priority);
|
||||
void releaseSuccessors(const SUnit* SU, int Priority);
|
||||
|
||||
public:
|
||||
std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
|
||||
const ScheduleDAG &DAG);
|
||||
};
|
||||
|
||||
void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
|
||||
NumPreds.resize(SUnits.size());
|
||||
for (unsigned I = 0; I < SUnits.size(); ++I)
|
||||
NumPreds[I] = SUnits[I].NumPredsLeft;
|
||||
}
|
||||
|
||||
int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
|
||||
unsigned NumSchedSuccs = 0;
|
||||
for (auto SDep : SU->Succs) {
|
||||
bool wouldBeScheduled = true;
|
||||
for (auto PDep : SDep.getSUnit()->Preds) {
|
||||
auto PSU = PDep.getSUnit();
|
||||
assert(!PSU->isBoundaryNode());
|
||||
if (PSU != SU && !isScheduled(PSU)) {
|
||||
wouldBeScheduled = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
NumSchedSuccs += wouldBeScheduled ? 1 : 0;
|
||||
}
|
||||
return NumSchedSuccs;
|
||||
}
|
||||
|
||||
int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
|
||||
return SU->Succs.size() - getReadySuccessors(SU);
|
||||
}
|
||||
|
||||
template <typename Calc>
|
||||
unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
|
||||
assert(!RQ.empty() && Num <= RQ.size());
|
||||
typedef decltype(C(*RQ.begin())) T;
|
||||
T Max = std::numeric_limits<T>::min();
|
||||
unsigned NumMax = 0;
|
||||
for (auto I = RQ.begin(); Num; --Num) {
|
||||
T Cur = C(*I);
|
||||
if (Cur >= Max) {
|
||||
if (Cur > Max) {
|
||||
Max = Cur;
|
||||
NumMax = 1;
|
||||
} else
|
||||
++NumMax;
|
||||
auto &Cand = *I++;
|
||||
RQ.remove(Cand);
|
||||
RQ.push_front(Cand);
|
||||
continue;
|
||||
}
|
||||
++I;
|
||||
}
|
||||
return NumMax;
|
||||
}
|
||||
|
||||
GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
|
||||
do {
|
||||
unsigned Num = RQ.size();
|
||||
if (Num == 1) break;
|
||||
|
||||
DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num << '\n');
|
||||
Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
|
||||
if (Num == 1) break;
|
||||
|
||||
DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
|
||||
<< Num << '\n');
|
||||
Num = findMax(Num, [=](const Candidate &C) {
|
||||
auto SU = C.SU;
|
||||
int Res = getNotReadySuccessors(SU);
|
||||
DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
|
||||
<< Res << " successors, metric = " << -Res << '\n');
|
||||
return -Res;
|
||||
});
|
||||
if (Num == 1) break;
|
||||
|
||||
DEBUG(dbgs() << "\nSelecting most producing candidate among "
|
||||
<< Num << '\n');
|
||||
Num = findMax(Num, [=](const Candidate &C) {
|
||||
auto SU = C.SU;
|
||||
auto Res = getReadySuccessors(SU);
|
||||
DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready "
|
||||
<< Res << " successors, metric = " << Res << '\n');
|
||||
return Res;
|
||||
});
|
||||
if (Num == 1) break;
|
||||
|
||||
Num = Num ? Num : RQ.size();
|
||||
DEBUG(dbgs() << "\nCan't find best candidate, selecting in program order among "
|
||||
<< Num << '\n');
|
||||
Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
|
||||
assert(Num == 1);
|
||||
} while (false);
|
||||
|
||||
return &RQ.front();
|
||||
}
|
||||
|
||||
void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
|
||||
SmallPtrSet<const SUnit*, 32> Set;
|
||||
for (const auto &S : SchedSU->Succs) {
|
||||
if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
|
||||
S.getKind() != SDep::Data)
|
||||
continue;
|
||||
for (const auto &P : S.getSUnit()->Preds) {
|
||||
auto PSU = P.getSUnit();
|
||||
assert(!PSU->isBoundaryNode());
|
||||
if (PSU != SchedSU && !isScheduled(PSU)) {
|
||||
Set.insert(PSU);
|
||||
}
|
||||
}
|
||||
}
|
||||
SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
|
||||
while (!Worklist.empty()) {
|
||||
auto SU = Worklist.pop_back_val();
|
||||
assert(!SU->isBoundaryNode());
|
||||
for (const auto &P : SU->Preds) {
|
||||
if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
|
||||
Set.insert(P.getSUnit()).second)
|
||||
Worklist.push_back(P.getSUnit());
|
||||
}
|
||||
}
|
||||
DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
|
||||
<< ")'s non-ready successors of " << Priority
|
||||
<< " priority in ready queue: ");
|
||||
const auto SetEnd = Set.end();
|
||||
for (auto &C : RQ) {
|
||||
if (Set.find(C.SU) != SetEnd) {
|
||||
C.Priority = Priority;
|
||||
DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
|
||||
}
|
||||
}
|
||||
DEBUG(dbgs() << '\n');
|
||||
}
|
||||
|
||||
void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
|
||||
for (const auto &S : SU->Succs) {
|
||||
auto SuccSU = S.getSUnit();
|
||||
if (S.isWeak())
|
||||
continue;
|
||||
assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
|
||||
if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
|
||||
RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const SUnit*>
|
||||
GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
|
||||
const ScheduleDAG &DAG) {
|
||||
const auto &SUnits = DAG.SUnits;
|
||||
std::vector<const SUnit*> Schedule;
|
||||
Schedule.reserve(SUnits.size());
|
||||
|
||||
initNumPreds(SUnits);
|
||||
|
||||
int StepNo = 0;
|
||||
|
||||
for (auto SU : TopRoots) {
|
||||
RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
|
||||
}
|
||||
releaseSuccessors(&DAG.EntrySU, StepNo);
|
||||
|
||||
while (!RQ.empty()) {
|
||||
DEBUG(
|
||||
dbgs() << "\n=== Picking candidate, Step = " << StepNo << "\n"
|
||||
"Ready queue:";
|
||||
for (auto &C : RQ)
|
||||
dbgs() << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
|
||||
dbgs() << '\n';
|
||||
);
|
||||
|
||||
auto C = pickCandidate();
|
||||
assert(C);
|
||||
RQ.remove(*C);
|
||||
auto SU = C->SU;
|
||||
DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
|
||||
|
||||
releaseSuccessors(SU, StepNo);
|
||||
Schedule.push_back(SU);
|
||||
setIsScheduled(SU);
|
||||
|
||||
if (getReadySuccessors(SU) == 0)
|
||||
bumpPredsPriority(SU, StepNo);
|
||||
|
||||
++StepNo;
|
||||
}
|
||||
assert(SUnits.size() == Schedule.size());
|
||||
|
||||
return Schedule;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
|
||||
const ScheduleDAG &DAG) {
|
||||
GCNMinRegScheduler S;
|
||||
return S.schedule(TopRoots, DAG);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,355 @@
|
|||
//===------------------------- GCNRegPressure.cpp - -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "GCNRegPressure.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "misched"
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
void llvm::printLivesAt(SlotIndex SI,
|
||||
const LiveIntervals &LIS,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
dbgs() << "Live regs at " << SI << ": "
|
||||
<< *LIS.getInstructionFromIndex(SI);
|
||||
unsigned Num = 0;
|
||||
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
|
||||
const unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
|
||||
if (MRI.reg_nodbg_empty(Reg))
|
||||
continue;
|
||||
const auto &LI = LIS.getInterval(Reg);
|
||||
if (LI.hasSubRanges()) {
|
||||
bool firstTime = true;
|
||||
for (const auto &S : LI.subranges()) {
|
||||
if (!S.liveAt(SI)) continue;
|
||||
if (firstTime) {
|
||||
dbgs() << " " << PrintReg(Reg, MRI.getTargetRegisterInfo())
|
||||
<< '\n';
|
||||
firstTime = false;
|
||||
}
|
||||
dbgs() << " " << S << '\n';
|
||||
++Num;
|
||||
}
|
||||
} else if (LI.liveAt(SI)) {
|
||||
dbgs() << " " << LI << '\n';
|
||||
++Num;
|
||||
}
|
||||
}
|
||||
if (!Num) dbgs() << " <none>\n";
|
||||
}
|
||||
|
||||
static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
|
||||
const GCNRPTracker::LiveRegSet &S2) {
|
||||
if (S1.size() != S2.size())
|
||||
return false;
|
||||
|
||||
for (const auto &P : S1) {
|
||||
auto I = S2.find(P.first);
|
||||
if (I == S2.end() || I->second != P.second)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static GCNRPTracker::LiveRegSet
|
||||
stripEmpty(const GCNRPTracker::LiveRegSet &LR) {
|
||||
GCNRPTracker::LiveRegSet Res;
|
||||
for (const auto &P : LR) {
|
||||
if (P.second.any())
|
||||
Res.insert(P);
|
||||
}
|
||||
return Res;
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// GCNRegPressure
|
||||
|
||||
unsigned GCNRegPressure::getRegKind(unsigned Reg,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
assert(TargetRegisterInfo::isVirtualRegister(Reg));
|
||||
const auto RC = MRI.getRegClass(Reg);
|
||||
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
|
||||
return STI->isSGPRClass(RC) ?
|
||||
(RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) :
|
||||
(RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE);
|
||||
}
|
||||
|
||||
void GCNRegPressure::inc(unsigned Reg,
|
||||
LaneBitmask PrevMask,
|
||||
LaneBitmask NewMask,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
if (NewMask == PrevMask)
|
||||
return;
|
||||
|
||||
int Sign = 1;
|
||||
if (NewMask < PrevMask) {
|
||||
std::swap(NewMask, PrevMask);
|
||||
Sign = -1;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
const auto MaxMask = MRI.getMaxLaneMaskForVReg(Reg);
|
||||
#endif
|
||||
switch (auto Kind = getRegKind(Reg, MRI)) {
|
||||
case SGPR32:
|
||||
case VGPR32:
|
||||
assert(PrevMask.none() && NewMask == MaxMask);
|
||||
Value[Kind] += Sign;
|
||||
break;
|
||||
|
||||
case SGPR_TUPLE:
|
||||
case VGPR_TUPLE:
|
||||
assert(NewMask < MaxMask || NewMask == MaxMask);
|
||||
assert(PrevMask < NewMask);
|
||||
|
||||
Value[Kind == SGPR_TUPLE ? SGPR32 : VGPR32] +=
|
||||
Sign * countPopulation((~PrevMask & NewMask).getAsInteger());
|
||||
|
||||
if (PrevMask.none()) {
|
||||
assert(NewMask.any());
|
||||
Value[Kind] += Sign * MRI.getPressureSets(Reg).getWeight();
|
||||
}
|
||||
break;
|
||||
|
||||
default: llvm_unreachable("Unknown register kind");
|
||||
}
|
||||
}
|
||||
|
||||
bool GCNRegPressure::less(const SISubtarget &ST,
|
||||
const GCNRegPressure& O,
|
||||
unsigned MaxOccupancy) const {
|
||||
const auto SGPROcc = std::min(MaxOccupancy,
|
||||
ST.getOccupancyWithNumSGPRs(getSGRPNum()));
|
||||
const auto VGPROcc = std::min(MaxOccupancy,
|
||||
ST.getOccupancyWithNumVGPRs(getVGRPNum()));
|
||||
const auto OtherSGPROcc = std::min(MaxOccupancy,
|
||||
ST.getOccupancyWithNumSGPRs(O.getSGRPNum()));
|
||||
const auto OtherVGPROcc = std::min(MaxOccupancy,
|
||||
ST.getOccupancyWithNumVGPRs(O.getVGRPNum()));
|
||||
|
||||
const auto Occ = std::min(SGPROcc, VGPROcc);
|
||||
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
|
||||
if (Occ != OtherOcc)
|
||||
return Occ > OtherOcc;
|
||||
|
||||
bool SGPRImportant = SGPROcc < VGPROcc;
|
||||
const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
|
||||
|
||||
// if both pressures disagree on what is more important compare vgprs
|
||||
if (SGPRImportant != OtherSGPRImportant) {
|
||||
SGPRImportant = false;
|
||||
}
|
||||
|
||||
// compare large regs pressure
|
||||
bool SGPRFirst = SGPRImportant;
|
||||
for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
|
||||
if (SGPRFirst) {
|
||||
auto SW = getSGPRTuplesWeight();
|
||||
auto OtherSW = O.getSGPRTuplesWeight();
|
||||
if (SW != OtherSW)
|
||||
return SW < OtherSW;
|
||||
} else {
|
||||
auto VW = getVGPRTuplesWeight();
|
||||
auto OtherVW = O.getVGPRTuplesWeight();
|
||||
if (VW != OtherVW)
|
||||
return VW < OtherVW;
|
||||
}
|
||||
}
|
||||
return SGPRImportant ? (getSGRPNum() < O.getSGRPNum()):
|
||||
(getVGRPNum() < O.getVGRPNum());
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const {
|
||||
OS << "VGPRs: " << getVGRPNum();
|
||||
if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGRPNum()) << ')';
|
||||
OS << ", SGPRs: " << getSGRPNum();
|
||||
if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGRPNum()) << ')';
|
||||
OS << ", LVGPR WT: " << getVGPRTuplesWeight()
|
||||
<< ", LSGPR WT: " << getSGPRTuplesWeight();
|
||||
if (ST) OS << " -> Occ: " << getOccupancy(*ST);
|
||||
OS << '\n';
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// GCNRPTracker
|
||||
|
||||
LaneBitmask llvm::getLiveLaneMask(unsigned Reg,
|
||||
SlotIndex SI,
|
||||
const LiveIntervals &LIS,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
assert(!MRI.reg_nodbg_empty(Reg));
|
||||
LaneBitmask LiveMask;
|
||||
const auto &LI = LIS.getInterval(Reg);
|
||||
if (LI.hasSubRanges()) {
|
||||
for (const auto &S : LI.subranges())
|
||||
if (S.liveAt(SI)) {
|
||||
LiveMask |= S.LaneMask;
|
||||
assert(LiveMask < MRI.getMaxLaneMaskForVReg(Reg) ||
|
||||
LiveMask == MRI.getMaxLaneMaskForVReg(Reg));
|
||||
}
|
||||
} else if (LI.liveAt(SI)) {
|
||||
LiveMask = MRI.getMaxLaneMaskForVReg(Reg);
|
||||
}
|
||||
return LiveMask;
|
||||
}
|
||||
|
||||
GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
|
||||
const LiveIntervals &LIS,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
GCNRPTracker::LiveRegSet LiveRegs;
|
||||
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
|
||||
auto Reg = TargetRegisterInfo::index2VirtReg(I);
|
||||
if (MRI.reg_nodbg_empty(Reg))
|
||||
continue;
|
||||
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
|
||||
if (LiveMask.any())
|
||||
LiveRegs[Reg] = LiveMask;
|
||||
}
|
||||
return LiveRegs;
|
||||
}
|
||||
|
||||
void GCNUpwardRPTracker::reset(const MachineInstr &MI) {
|
||||
MRI = &MI.getParent()->getParent()->getRegInfo();
|
||||
LiveRegs = getLiveRegsAfter(MI, LIS);
|
||||
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
|
||||
}
|
||||
|
||||
LaneBitmask GCNUpwardRPTracker::getDefRegMask(const MachineOperand &MO) const {
|
||||
assert(MO.isDef() && MO.isReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(MO.getReg()));
|
||||
|
||||
// We don't rely on read-undef flag because in case of tentative schedule
|
||||
// tracking it isn't set correctly yet. This works correctly however since
|
||||
// use mask has been tracked before using LIS.
|
||||
return MO.getSubReg() == 0 ?
|
||||
MRI->getMaxLaneMaskForVReg(MO.getReg()) :
|
||||
MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
|
||||
}
|
||||
|
||||
LaneBitmask GCNUpwardRPTracker::getUsedRegMask(const MachineOperand &MO) const {
|
||||
assert(MO.isUse() && MO.isReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(MO.getReg()));
|
||||
|
||||
if (auto SubReg = MO.getSubReg())
|
||||
return MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
|
||||
|
||||
auto MaxMask = MRI->getMaxLaneMaskForVReg(MO.getReg());
|
||||
if (MaxMask.getAsInteger() == 1) // cannot have subregs
|
||||
return MaxMask;
|
||||
|
||||
// For a tentative schedule LIS isn't updated yet but livemask should remain
|
||||
// the same on any schedule. Subreg defs can be reordered but they all must
|
||||
// dominate uses anyway.
|
||||
auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
|
||||
return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI);
|
||||
}
|
||||
|
||||
void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
|
||||
assert(MRI && "call reset first");
|
||||
|
||||
LastTrackedMI = &MI;
|
||||
|
||||
if (MI.isDebugValue())
|
||||
return;
|
||||
|
||||
// process all defs first to ensure early clobbers are handled correctly
|
||||
// iterating over operands() to catch implicit defs
|
||||
for (const auto &MO : MI.operands()) {
|
||||
if (!MO.isReg() || !MO.isDef() ||
|
||||
!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
continue;
|
||||
|
||||
auto Reg = MO.getReg();
|
||||
auto &LiveMask = LiveRegs[Reg];
|
||||
auto PrevMask = LiveMask;
|
||||
LiveMask &= ~getDefRegMask(MO);
|
||||
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
|
||||
}
|
||||
|
||||
// then all uses
|
||||
for (const auto &MO : MI.uses()) {
|
||||
if (!MO.isReg() || !MO.readsReg() ||
|
||||
!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
continue;
|
||||
|
||||
auto Reg = MO.getReg();
|
||||
auto &LiveMask = LiveRegs[Reg];
|
||||
auto PrevMask = LiveMask;
|
||||
LiveMask |= getUsedRegMask(MO);
|
||||
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
|
||||
}
|
||||
|
||||
MaxPressure = max(MaxPressure, CurPressure);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD
|
||||
static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
|
||||
const GCNRPTracker::LiveRegSet &TrackedLR,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
for (auto const &P : TrackedLR) {
|
||||
auto I = LISLR.find(P.first);
|
||||
if (I == LISLR.end()) {
|
||||
dbgs() << " " << PrintReg(P.first, TRI)
|
||||
<< ":L" << PrintLaneMask(P.second)
|
||||
<< " isn't found in LIS reported set\n";
|
||||
}
|
||||
else if (I->second != P.second) {
|
||||
dbgs() << " " << PrintReg(P.first, TRI)
|
||||
<< " masks doesn't match: LIS reported "
|
||||
<< PrintLaneMask(I->second)
|
||||
<< ", tracked "
|
||||
<< PrintLaneMask(P.second)
|
||||
<< '\n';
|
||||
}
|
||||
}
|
||||
for (auto const &P : LISLR) {
|
||||
auto I = TrackedLR.find(P.first);
|
||||
if (I == TrackedLR.end()) {
|
||||
dbgs() << " " << PrintReg(P.first, TRI)
|
||||
<< ":L" << PrintLaneMask(P.second)
|
||||
<< " isn't found in tracked set\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool GCNUpwardRPTracker::isValid() const {
|
||||
const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
|
||||
const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
|
||||
const auto TrackedLR = stripEmpty(LiveRegs);
|
||||
|
||||
if (!isEqual(LISLR, TrackedLR)) {
|
||||
dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
|
||||
" LIS reported livesets mismatch:\n";
|
||||
printLivesAt(SI, LIS, *MRI);
|
||||
reportMismatch(LISLR, TrackedLR, MRI->getTargetRegisterInfo());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto LISPressure = getRegPressure(*MRI, LISLR);
|
||||
if (LISPressure != CurPressure) {
|
||||
dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: ";
|
||||
CurPressure.print(dbgs());
|
||||
dbgs() << "LIS rpt: ";
|
||||
LISPressure.print(dbgs());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,170 @@
|
|||
//===---------------------- GCNRegPressure.h -*- C++ -*--------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
|
||||
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
struct GCNRegPressure {
|
||||
enum RegKind {
|
||||
SGPR32,
|
||||
SGPR_TUPLE,
|
||||
VGPR32,
|
||||
VGPR_TUPLE,
|
||||
TOTAL_KINDS
|
||||
};
|
||||
|
||||
GCNRegPressure() {
|
||||
clear();
|
||||
}
|
||||
|
||||
bool empty() const { return getSGRPNum() == 0 && getVGRPNum() == 0; }
|
||||
|
||||
void clear() { std::fill(&Value[0], &Value[TOTAL_KINDS], 0); }
|
||||
|
||||
unsigned getSGRPNum() const { return Value[SGPR32]; }
|
||||
unsigned getVGRPNum() const { return Value[VGPR32]; }
|
||||
|
||||
unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; }
|
||||
unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
|
||||
|
||||
unsigned getOccupancy(const SISubtarget &ST) const {
|
||||
return std::min(ST.getOccupancyWithNumSGPRs(getSGRPNum()),
|
||||
ST.getOccupancyWithNumVGPRs(getVGRPNum()));
|
||||
}
|
||||
|
||||
void inc(unsigned Reg,
|
||||
LaneBitmask PrevMask,
|
||||
LaneBitmask NewMask,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
bool higherOccupancy(const SISubtarget &ST, const GCNRegPressure& O) const {
|
||||
return getOccupancy(ST) > O.getOccupancy(ST);
|
||||
}
|
||||
|
||||
bool less(const SISubtarget &ST, const GCNRegPressure& O,
|
||||
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
|
||||
|
||||
bool operator==(const GCNRegPressure &O) const {
|
||||
return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);
|
||||
}
|
||||
|
||||
bool operator!=(const GCNRegPressure &O) const {
|
||||
return !(*this == O);
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const SISubtarget *ST=nullptr) const;
|
||||
void dump() const { print(dbgs()); }
|
||||
|
||||
private:
|
||||
unsigned Value[TOTAL_KINDS];
|
||||
|
||||
static unsigned getRegKind(unsigned Reg, const MachineRegisterInfo &MRI);
|
||||
|
||||
friend GCNRegPressure max(const GCNRegPressure &P1,
|
||||
const GCNRegPressure &P2);
|
||||
};
|
||||
|
||||
inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
|
||||
GCNRegPressure Res;
|
||||
for (unsigned I = 0; I < GCNRegPressure::TOTAL_KINDS; ++I)
|
||||
Res.Value[I] = std::max(P1.Value[I], P2.Value[I]);
|
||||
return Res;
|
||||
}
|
||||
|
||||
class GCNRPTracker {
|
||||
public:
|
||||
typedef DenseMap<unsigned, LaneBitmask> LiveRegSet;
|
||||
|
||||
protected:
|
||||
LiveRegSet LiveRegs;
|
||||
GCNRegPressure CurPressure, MaxPressure;
|
||||
const MachineInstr *LastTrackedMI = nullptr;
|
||||
mutable const MachineRegisterInfo *MRI = nullptr;
|
||||
GCNRPTracker() {}
|
||||
public:
|
||||
// live regs for the current state
|
||||
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
|
||||
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
|
||||
|
||||
// returns MaxPressure, resetting it
|
||||
decltype(MaxPressure) moveMaxPressure() {
|
||||
auto Res = MaxPressure;
|
||||
MaxPressure.clear();
|
||||
return Res;
|
||||
}
|
||||
decltype(LiveRegs) moveLiveRegs() {
|
||||
return std::move(LiveRegs);
|
||||
}
|
||||
};
|
||||
|
||||
class GCNUpwardRPTracker : public GCNRPTracker {
|
||||
const LiveIntervals &LIS;
|
||||
LaneBitmask getDefRegMask(const MachineOperand &MO) const;
|
||||
LaneBitmask getUsedRegMask(const MachineOperand &MO) const;
|
||||
public:
|
||||
GCNUpwardRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
|
||||
// reset tracker to the point just below MI
|
||||
// filling live regs upon this point using LIS
|
||||
void reset(const MachineInstr &MI);
|
||||
|
||||
// move to the state just above the MI
|
||||
void recede(const MachineInstr &MI);
|
||||
|
||||
// checks whether the tracker's state after receding MI corresponds
|
||||
// to reported by LIS
|
||||
bool isValid() const;
|
||||
};
|
||||
|
||||
LaneBitmask getLiveLaneMask(unsigned Reg,
|
||||
SlotIndex SI,
|
||||
const LiveIntervals &LIS,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
|
||||
const LiveIntervals &LIS,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
|
||||
const LiveIntervals &LIS) {
|
||||
return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
|
||||
MI.getParent()->getParent()->getRegInfo());
|
||||
}
|
||||
|
||||
inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI,
|
||||
const LiveIntervals &LIS) {
|
||||
return getLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS,
|
||||
MI.getParent()->getParent()->getRegInfo());
|
||||
}
|
||||
|
||||
template <typename Range>
|
||||
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
|
||||
Range &&LiveRegs) {
|
||||
GCNRegPressure Res;
|
||||
for (const auto &RM : LiveRegs)
|
||||
Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI);
|
||||
return Res;
|
||||
}
|
||||
|
||||
void printLivesAt(SlotIndex SI,
|
||||
const LiveIntervals &LIS,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
|
|
@ -45,8 +45,6 @@ void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
|
|||
|
||||
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
|
||||
|
||||
if (MF != &DAG->MF)
|
||||
TargetOccupancy = 0;
|
||||
MF = &DAG->MF;
|
||||
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
|
@ -531,7 +529,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
|
|||
|
||||
Stage++;
|
||||
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
|
||||
S.TargetOccupancy = MinOccupancy;
|
||||
S.setTargetOccupancy(MinOccupancy);
|
||||
|
||||
MachineBasicBlock *MBB = nullptr;
|
||||
for (auto Region : Regions) {
|
||||
|
|
|
@ -55,6 +55,8 @@ public:
|
|||
SUnit *pickNode(bool &IsTopNode) override;
|
||||
|
||||
void initialize(ScheduleDAGMI *DAG) override;
|
||||
|
||||
void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
|
||||
};
|
||||
|
||||
class GCNScheduleDAGMILive : public ScheduleDAGMILive {
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; We expect a two digit VGPR usage here, not a three digit.
|
||||
; CHECK: NumVgprs: {{[0-9][0-9]$}}
|
||||
|
|
|
@ -0,0 +1,288 @@
|
|||
; RUN: llc -march=amdgcn -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
|
||||
|
||||
; SI: NumSgprs: {{[1-9]$}}
|
||||
; SI: NumVgprs: {{[1-9]$}}
|
||||
|
||||
; stores may alias loads
|
||||
; VI: NumSgprs: {{[1-5][0-9]$}}
|
||||
; VI: NumVgprs: {{[1-3][0-9]$}}
|
||||
|
||||
define void @load_fma_store(float addrspace(3)* nocapture readonly %in_arg, float addrspace(1)* nocapture %out_arg) {
|
||||
bb:
|
||||
%adr.a.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20004
|
||||
%adr.b.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20252
|
||||
%adr.c.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20508
|
||||
%adr.a.1 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20772
|
||||
%adr.b.1 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21020
|
||||
%adr.c.1 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21276
|
||||
%adr.a.2 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21540
|
||||
%adr.b.2 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21788
|
||||
%adr.c.2 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22044
|
||||
%adr.a.3 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22308
|
||||
%adr.b.3 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22556
|
||||
%adr.c.3 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22812
|
||||
%adr.a.4 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23076
|
||||
%adr.b.4 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23324
|
||||
%adr.c.4 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23580
|
||||
%adr.a.5 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23844
|
||||
%adr.b.5 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24092
|
||||
%adr.c.5 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24348
|
||||
%adr.a.6 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24612
|
||||
%adr.b.6 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24860
|
||||
%adr.c.6 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25116
|
||||
%adr.a.7 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25380
|
||||
%adr.b.7 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25628
|
||||
%adr.c.7 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25884
|
||||
%adr.a.8 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26148
|
||||
%adr.b.8 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26396
|
||||
%adr.c.8 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26652
|
||||
%adr.a.9 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26916
|
||||
%adr.b.9 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27164
|
||||
%adr.c.9 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27420
|
||||
%adr.a.10 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27684
|
||||
%adr.b.10 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27932
|
||||
%adr.c.10 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28188
|
||||
%adr.a.11 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28452
|
||||
%adr.b.11 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28700
|
||||
%adr.c.11 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28956
|
||||
%adr.a.12 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29220
|
||||
%adr.b.12 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29468
|
||||
%adr.c.12 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29724
|
||||
%adr.a.13 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29988
|
||||
%adr.b.13 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 30236
|
||||
%adr.c.13 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 30492
|
||||
%adr.a.14 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 30756
|
||||
%adr.b.14 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31004
|
||||
%adr.c.14 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31260
|
||||
%adr.a.15 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31524
|
||||
%adr.b.15 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31772
|
||||
%adr.c.15 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32028
|
||||
%adr.a.16 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32292
|
||||
%adr.b.16 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32540
|
||||
%adr.c.16 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32796
|
||||
%adr.a.17 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33060
|
||||
%adr.b.17 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33308
|
||||
%adr.c.17 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33564
|
||||
%adr.a.18 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33828
|
||||
%adr.b.18 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34076
|
||||
%adr.c.18 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34332
|
||||
%adr.a.19 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34596
|
||||
%adr.b.19 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34844
|
||||
%adr.c.19 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35100
|
||||
%adr.a.20 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35364
|
||||
%adr.b.20 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35612
|
||||
%adr.c.20 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35868
|
||||
%adr.a.21 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36132
|
||||
%adr.b.21 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36380
|
||||
%adr.c.21 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36636
|
||||
%adr.a.22 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36900
|
||||
%adr.b.22 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37148
|
||||
%adr.c.22 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37404
|
||||
%adr.a.23 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37668
|
||||
%adr.b.23 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37916
|
||||
%adr.c.23 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38172
|
||||
%adr.a.24 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38436
|
||||
%adr.b.24 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38684
|
||||
%adr.c.24 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38940
|
||||
%adr.a.25 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39204
|
||||
%adr.b.25 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39452
|
||||
%adr.c.25 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39708
|
||||
%adr.a.26 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39972
|
||||
%adr.b.26 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40220
|
||||
%adr.c.26 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40476
|
||||
%adr.a.27 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40740
|
||||
%adr.b.27 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40988
|
||||
%adr.c.27 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 41244
|
||||
%adr.a.28 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 41508
|
||||
%adr.b.28 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 41756
|
||||
%adr.c.28 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42012
|
||||
%adr.a.29 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42276
|
||||
%adr.b.29 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42524
|
||||
%adr.c.29 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42780
|
||||
%a.0 = load float, float addrspace(3)* %adr.a.0, align 4
|
||||
%b.0 = load float, float addrspace(3)* %adr.b.0, align 4
|
||||
%c.0 = load float, float addrspace(3)* %adr.c.0, align 4
|
||||
%a.1 = load float, float addrspace(3)* %adr.a.1, align 4
|
||||
%b.1 = load float, float addrspace(3)* %adr.b.1, align 4
|
||||
%c.1 = load float, float addrspace(3)* %adr.c.1, align 4
|
||||
%a.2 = load float, float addrspace(3)* %adr.a.2, align 4
|
||||
%b.2 = load float, float addrspace(3)* %adr.b.2, align 4
|
||||
%c.2 = load float, float addrspace(3)* %adr.c.2, align 4
|
||||
%a.3 = load float, float addrspace(3)* %adr.a.3, align 4
|
||||
%b.3 = load float, float addrspace(3)* %adr.b.3, align 4
|
||||
%c.3 = load float, float addrspace(3)* %adr.c.3, align 4
|
||||
%a.4 = load float, float addrspace(3)* %adr.a.4, align 4
|
||||
%b.4 = load float, float addrspace(3)* %adr.b.4, align 4
|
||||
%c.4 = load float, float addrspace(3)* %adr.c.4, align 4
|
||||
%a.5 = load float, float addrspace(3)* %adr.a.5, align 4
|
||||
%b.5 = load float, float addrspace(3)* %adr.b.5, align 4
|
||||
%c.5 = load float, float addrspace(3)* %adr.c.5, align 4
|
||||
%a.6 = load float, float addrspace(3)* %adr.a.6, align 4
|
||||
%b.6 = load float, float addrspace(3)* %adr.b.6, align 4
|
||||
%c.6 = load float, float addrspace(3)* %adr.c.6, align 4
|
||||
%a.7 = load float, float addrspace(3)* %adr.a.7, align 4
|
||||
%b.7 = load float, float addrspace(3)* %adr.b.7, align 4
|
||||
%c.7 = load float, float addrspace(3)* %adr.c.7, align 4
|
||||
%a.8 = load float, float addrspace(3)* %adr.a.8, align 4
|
||||
%b.8 = load float, float addrspace(3)* %adr.b.8, align 4
|
||||
%c.8 = load float, float addrspace(3)* %adr.c.8, align 4
|
||||
%a.9 = load float, float addrspace(3)* %adr.a.9, align 4
|
||||
%b.9 = load float, float addrspace(3)* %adr.b.9, align 4
|
||||
%c.9 = load float, float addrspace(3)* %adr.c.9, align 4
|
||||
%a.10 = load float, float addrspace(3)* %adr.a.10, align 4
|
||||
%b.10 = load float, float addrspace(3)* %adr.b.10, align 4
|
||||
%c.10 = load float, float addrspace(3)* %adr.c.10, align 4
|
||||
%a.11 = load float, float addrspace(3)* %adr.a.11, align 4
|
||||
%b.11 = load float, float addrspace(3)* %adr.b.11, align 4
|
||||
%c.11 = load float, float addrspace(3)* %adr.c.11, align 4
|
||||
%a.12 = load float, float addrspace(3)* %adr.a.12, align 4
|
||||
%b.12 = load float, float addrspace(3)* %adr.b.12, align 4
|
||||
%c.12 = load float, float addrspace(3)* %adr.c.12, align 4
|
||||
%a.13 = load float, float addrspace(3)* %adr.a.13, align 4
|
||||
%b.13 = load float, float addrspace(3)* %adr.b.13, align 4
|
||||
%c.13 = load float, float addrspace(3)* %adr.c.13, align 4
|
||||
%a.14 = load float, float addrspace(3)* %adr.a.14, align 4
|
||||
%b.14 = load float, float addrspace(3)* %adr.b.14, align 4
|
||||
%c.14 = load float, float addrspace(3)* %adr.c.14, align 4
|
||||
%a.15 = load float, float addrspace(3)* %adr.a.15, align 4
|
||||
%b.15 = load float, float addrspace(3)* %adr.b.15, align 4
|
||||
%c.15 = load float, float addrspace(3)* %adr.c.15, align 4
|
||||
%a.16 = load float, float addrspace(3)* %adr.a.16, align 4
|
||||
%b.16 = load float, float addrspace(3)* %adr.b.16, align 4
|
||||
%c.16 = load float, float addrspace(3)* %adr.c.16, align 4
|
||||
%a.17 = load float, float addrspace(3)* %adr.a.17, align 4
|
||||
%b.17 = load float, float addrspace(3)* %adr.b.17, align 4
|
||||
%c.17 = load float, float addrspace(3)* %adr.c.17, align 4
|
||||
%a.18 = load float, float addrspace(3)* %adr.a.18, align 4
|
||||
%b.18 = load float, float addrspace(3)* %adr.b.18, align 4
|
||||
%c.18 = load float, float addrspace(3)* %adr.c.18, align 4
|
||||
%a.19 = load float, float addrspace(3)* %adr.a.19, align 4
|
||||
%b.19 = load float, float addrspace(3)* %adr.b.19, align 4
|
||||
%c.19 = load float, float addrspace(3)* %adr.c.19, align 4
|
||||
%a.20 = load float, float addrspace(3)* %adr.a.20, align 4
|
||||
%b.20 = load float, float addrspace(3)* %adr.b.20, align 4
|
||||
%c.20 = load float, float addrspace(3)* %adr.c.20, align 4
|
||||
%a.21 = load float, float addrspace(3)* %adr.a.21, align 4
|
||||
%b.21 = load float, float addrspace(3)* %adr.b.21, align 4
|
||||
%c.21 = load float, float addrspace(3)* %adr.c.21, align 4
|
||||
%a.22 = load float, float addrspace(3)* %adr.a.22, align 4
|
||||
%b.22 = load float, float addrspace(3)* %adr.b.22, align 4
|
||||
%c.22 = load float, float addrspace(3)* %adr.c.22, align 4
|
||||
%a.23 = load float, float addrspace(3)* %adr.a.23, align 4
|
||||
%b.23 = load float, float addrspace(3)* %adr.b.23, align 4
|
||||
%c.23 = load float, float addrspace(3)* %adr.c.23, align 4
|
||||
%a.24 = load float, float addrspace(3)* %adr.a.24, align 4
|
||||
%b.24 = load float, float addrspace(3)* %adr.b.24, align 4
|
||||
%c.24 = load float, float addrspace(3)* %adr.c.24, align 4
|
||||
%a.25 = load float, float addrspace(3)* %adr.a.25, align 4
|
||||
%b.25 = load float, float addrspace(3)* %adr.b.25, align 4
|
||||
%c.25 = load float, float addrspace(3)* %adr.c.25, align 4
|
||||
%a.26 = load float, float addrspace(3)* %adr.a.26, align 4
|
||||
%b.26 = load float, float addrspace(3)* %adr.b.26, align 4
|
||||
%c.26 = load float, float addrspace(3)* %adr.c.26, align 4
|
||||
%a.27 = load float, float addrspace(3)* %adr.a.27, align 4
|
||||
%b.27 = load float, float addrspace(3)* %adr.b.27, align 4
|
||||
%c.27 = load float, float addrspace(3)* %adr.c.27, align 4
|
||||
%a.28 = load float, float addrspace(3)* %adr.a.28, align 4
|
||||
%b.28 = load float, float addrspace(3)* %adr.b.28, align 4
|
||||
%c.28 = load float, float addrspace(3)* %adr.c.28, align 4
|
||||
%a.29 = load float, float addrspace(3)* %adr.a.29, align 4
|
||||
%b.29 = load float, float addrspace(3)* %adr.b.29, align 4
|
||||
%c.29 = load float, float addrspace(3)* %adr.c.29, align 4
|
||||
%res.0 = tail call float @llvm.fmuladd.f32(float %a.0, float %b.0, float %c.0)
|
||||
%res.1 = tail call float @llvm.fmuladd.f32(float %a.1, float %b.1, float %c.1)
|
||||
%res.2 = tail call float @llvm.fmuladd.f32(float %a.2, float %b.2, float %c.2)
|
||||
%res.3 = tail call float @llvm.fmuladd.f32(float %a.3, float %b.3, float %c.3)
|
||||
%res.4 = tail call float @llvm.fmuladd.f32(float %a.4, float %b.4, float %c.4)
|
||||
%res.5 = tail call float @llvm.fmuladd.f32(float %a.5, float %b.5, float %c.5)
|
||||
%res.6 = tail call float @llvm.fmuladd.f32(float %a.6, float %b.6, float %c.6)
|
||||
%res.7 = tail call float @llvm.fmuladd.f32(float %a.7, float %b.7, float %c.7)
|
||||
%res.8 = tail call float @llvm.fmuladd.f32(float %a.8, float %b.8, float %c.8)
|
||||
%res.9 = tail call float @llvm.fmuladd.f32(float %a.9, float %b.9, float %c.9)
|
||||
%res.10 = tail call float @llvm.fmuladd.f32(float %a.10, float %b.10, float %c.10)
|
||||
%res.11 = tail call float @llvm.fmuladd.f32(float %a.11, float %b.11, float %c.11)
|
||||
%res.12 = tail call float @llvm.fmuladd.f32(float %a.12, float %b.12, float %c.12)
|
||||
%res.13 = tail call float @llvm.fmuladd.f32(float %a.13, float %b.13, float %c.13)
|
||||
%res.14 = tail call float @llvm.fmuladd.f32(float %a.14, float %b.14, float %c.14)
|
||||
%res.15 = tail call float @llvm.fmuladd.f32(float %a.15, float %b.15, float %c.15)
|
||||
%res.16 = tail call float @llvm.fmuladd.f32(float %a.16, float %b.16, float %c.16)
|
||||
%res.17 = tail call float @llvm.fmuladd.f32(float %a.17, float %b.17, float %c.17)
|
||||
%res.18 = tail call float @llvm.fmuladd.f32(float %a.18, float %b.18, float %c.18)
|
||||
%res.19 = tail call float @llvm.fmuladd.f32(float %a.19, float %b.19, float %c.19)
|
||||
%res.20 = tail call float @llvm.fmuladd.f32(float %a.20, float %b.20, float %c.20)
|
||||
%res.21 = tail call float @llvm.fmuladd.f32(float %a.21, float %b.21, float %c.21)
|
||||
%res.22 = tail call float @llvm.fmuladd.f32(float %a.22, float %b.22, float %c.22)
|
||||
%res.23 = tail call float @llvm.fmuladd.f32(float %a.23, float %b.23, float %c.23)
|
||||
%res.24 = tail call float @llvm.fmuladd.f32(float %a.24, float %b.24, float %c.24)
|
||||
%res.25 = tail call float @llvm.fmuladd.f32(float %a.25, float %b.25, float %c.25)
|
||||
%res.26 = tail call float @llvm.fmuladd.f32(float %a.26, float %b.26, float %c.26)
|
||||
%res.27 = tail call float @llvm.fmuladd.f32(float %a.27, float %b.27, float %c.27)
|
||||
%res.28 = tail call float @llvm.fmuladd.f32(float %a.28, float %b.28, float %c.28)
|
||||
%res.29 = tail call float @llvm.fmuladd.f32(float %a.29, float %b.29, float %c.29)
|
||||
%adr.res.0 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 0
|
||||
%adr.res.1 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 2
|
||||
%adr.res.2 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 4
|
||||
%adr.res.3 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 6
|
||||
%adr.res.4 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 8
|
||||
%adr.res.5 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 10
|
||||
%adr.res.6 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 12
|
||||
%adr.res.7 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 14
|
||||
%adr.res.8 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 16
|
||||
%adr.res.9 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 18
|
||||
%adr.res.10 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 20
|
||||
%adr.res.11 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 22
|
||||
%adr.res.12 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 24
|
||||
%adr.res.13 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 26
|
||||
%adr.res.14 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 28
|
||||
%adr.res.15 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 30
|
||||
%adr.res.16 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 32
|
||||
%adr.res.17 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 34
|
||||
%adr.res.18 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 36
|
||||
%adr.res.19 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 38
|
||||
%adr.res.20 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 40
|
||||
%adr.res.21 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 42
|
||||
%adr.res.22 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 44
|
||||
%adr.res.23 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 46
|
||||
%adr.res.24 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 48
|
||||
%adr.res.25 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 50
|
||||
%adr.res.26 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 52
|
||||
%adr.res.27 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 54
|
||||
%adr.res.28 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 56
|
||||
%adr.res.29 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 58
|
||||
store float %res.0, float addrspace(1)* %adr.res.0, align 4
|
||||
store float %res.1, float addrspace(1)* %adr.res.1, align 4
|
||||
store float %res.2, float addrspace(1)* %adr.res.2, align 4
|
||||
store float %res.3, float addrspace(1)* %adr.res.3, align 4
|
||||
store float %res.4, float addrspace(1)* %adr.res.4, align 4
|
||||
store float %res.5, float addrspace(1)* %adr.res.5, align 4
|
||||
store float %res.6, float addrspace(1)* %adr.res.6, align 4
|
||||
store float %res.7, float addrspace(1)* %adr.res.7, align 4
|
||||
store float %res.8, float addrspace(1)* %adr.res.8, align 4
|
||||
store float %res.9, float addrspace(1)* %adr.res.9, align 4
|
||||
store float %res.10, float addrspace(1)* %adr.res.10, align 4
|
||||
store float %res.11, float addrspace(1)* %adr.res.11, align 4
|
||||
store float %res.12, float addrspace(1)* %adr.res.12, align 4
|
||||
store float %res.13, float addrspace(1)* %adr.res.13, align 4
|
||||
store float %res.14, float addrspace(1)* %adr.res.14, align 4
|
||||
store float %res.15, float addrspace(1)* %adr.res.15, align 4
|
||||
store float %res.16, float addrspace(1)* %adr.res.16, align 4
|
||||
store float %res.17, float addrspace(1)* %adr.res.17, align 4
|
||||
store float %res.18, float addrspace(1)* %adr.res.18, align 4
|
||||
store float %res.19, float addrspace(1)* %adr.res.19, align 4
|
||||
store float %res.20, float addrspace(1)* %adr.res.20, align 4
|
||||
store float %res.21, float addrspace(1)* %adr.res.21, align 4
|
||||
store float %res.22, float addrspace(1)* %adr.res.22, align 4
|
||||
store float %res.23, float addrspace(1)* %adr.res.23, align 4
|
||||
store float %res.24, float addrspace(1)* %adr.res.24, align 4
|
||||
store float %res.25, float addrspace(1)* %adr.res.25, align 4
|
||||
store float %res.26, float addrspace(1)* %adr.res.26, align 4
|
||||
store float %res.27, float addrspace(1)* %adr.res.27, align 4
|
||||
store float %res.28, float addrspace(1)* %adr.res.28, align 4
|
||||
store float %res.29, float addrspace(1)* %adr.res.29, align 4
|
||||
ret void
|
||||
}
|
||||
declare float @llvm.fmuladd.f32(float, float, float) #0
|
||||
attributes #0 = { nounwind readnone }
|
Loading…
Reference in New Issue