forked from OSchip/llvm-project
[llvm-mca] Delay calculation of Cycles per Resources, separate the cycles and resource quantities.
Summary: This patch removes the storing of accumulated floating point data within the llvm-mca library. This patch splits-up the two quantities: cycles and number of resource units. By splitting-up these two quantities, we delay the calculation of "cycles per resource unit" until that value is read, reducing the chance of accumulating floating point error. I considered using the APFloat, but after measuring performance, for a large (many iteration) sample, I decided to go with this faster solution. Reviewers: andreadb, courbet, RKSimon Reviewed By: andreadb Subscribers: llvm-commits, javed.absar, tschuett, gbedwell Differential Revision: https://reviews.llvm.org/D51903 llvm-svn: 341980
This commit is contained in:
parent
44c1b3a331
commit
db834837c2
|
@ -46,7 +46,8 @@ void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
|
|||
return;
|
||||
const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
|
||||
const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
|
||||
for (const std::pair<ResourceRef, double> &Use : IssueEvent.UsedResources) {
|
||||
for (const std::pair<ResourceRef, ResourceCycles> &Use :
|
||||
IssueEvent.UsedResources) {
|
||||
const ResourceRef &RR = Use.first;
|
||||
assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
|
||||
unsigned R2VIndex = Resource2VecIndex[RR.first];
|
||||
|
|
|
@ -79,7 +79,7 @@ class ResourcePressureView : public View {
|
|||
llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
|
||||
|
||||
// Table of resources used by instructions.
|
||||
std::vector<double> ResourceUsage;
|
||||
std::vector<ResourceCycles> ResourceUsage;
|
||||
unsigned NumResourceUnits;
|
||||
|
||||
const llvm::MCInst &GetMCInstFromIndex(unsigned Index) const;
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
|
||||
|
||||
#include "Instruction.h"
|
||||
#include "Support.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include <utility>
|
||||
|
||||
namespace mca {
|
||||
|
||||
|
@ -61,11 +61,12 @@ public:
|
|||
class HWInstructionIssuedEvent : public HWInstructionEvent {
|
||||
public:
|
||||
using ResourceRef = std::pair<uint64_t, uint64_t>;
|
||||
HWInstructionIssuedEvent(const InstRef &IR,
|
||||
llvm::ArrayRef<std::pair<ResourceRef, double>> UR)
|
||||
HWInstructionIssuedEvent(
|
||||
const InstRef &IR,
|
||||
llvm::ArrayRef<std::pair<ResourceRef, ResourceCycles>> UR)
|
||||
: HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
|
||||
|
||||
llvm::ArrayRef<std::pair<ResourceRef, double>> UsedResources;
|
||||
llvm::ArrayRef<std::pair<ResourceRef, ResourceCycles>> UsedResources;
|
||||
};
|
||||
|
||||
class HWInstructionDispatchedEvent : public HWInstructionEvent {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
|
||||
|
||||
#include "Instruction.h"
|
||||
#include "Support.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
@ -344,7 +345,7 @@ public:
|
|||
|
||||
void issueInstruction(
|
||||
const InstrDesc &Desc,
|
||||
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
|
||||
llvm::SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
|
||||
|
||||
void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "HardwareUnits/HardwareUnit.h"
|
||||
#include "HardwareUnits/LSUnit.h"
|
||||
#include "ResourceManager.h"
|
||||
#include "Support.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/MC/MCSchedule.h"
|
||||
|
||||
|
@ -103,7 +104,7 @@ class Scheduler : public HardwareUnit {
|
|||
/// Issue an instruction without updating the ready queue.
|
||||
void issueInstructionImpl(
|
||||
InstRef &IR,
|
||||
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
|
||||
llvm::SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
|
||||
|
||||
// Identify instructions that have finished executing, and remove them from
|
||||
// the IssuedSet. References to executed instructions are added to input
|
||||
|
@ -164,10 +165,10 @@ public:
|
|||
/// Issue an instruction and populates a vector of used pipeline resources,
|
||||
/// and a vector of instructions that transitioned to the ready state as a
|
||||
/// result of this event.
|
||||
void
|
||||
issueInstruction(InstRef &IR,
|
||||
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used,
|
||||
llvm::SmallVectorImpl<InstRef> &Ready);
|
||||
void issueInstruction(
|
||||
InstRef &IR,
|
||||
llvm::SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
|
||||
llvm::SmallVectorImpl<InstRef> &Ready);
|
||||
|
||||
/// Returns true if IR has to be issued immediately, or if IR is a zero
|
||||
/// latency instruction.
|
||||
|
|
|
@ -59,9 +59,9 @@ public:
|
|||
llvm::Error cycleStart() override;
|
||||
llvm::Error execute(InstRef &IR) override;
|
||||
|
||||
void
|
||||
notifyInstructionIssued(const InstRef &IR,
|
||||
llvm::ArrayRef<std::pair<ResourceRef, double>> Used);
|
||||
void notifyInstructionIssued(
|
||||
const InstRef &IR,
|
||||
llvm::ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used);
|
||||
void notifyInstructionExecuted(const InstRef &IR);
|
||||
void notifyInstructionReady(const InstRef &IR);
|
||||
void notifyResourceAvailable(const ResourceRef &RR);
|
||||
|
|
|
@ -28,7 +28,7 @@ namespace mca {
|
|||
class InstructionTables final : public Stage {
|
||||
const llvm::MCSchedModel &SM;
|
||||
InstrBuilder &IB;
|
||||
llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
|
||||
llvm::SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
|
||||
|
||||
public:
|
||||
InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)
|
||||
|
|
|
@ -21,6 +21,46 @@
|
|||
|
||||
namespace mca {
|
||||
|
||||
/// This class represents the number of cycles per resource (fractions of
|
||||
/// cycles). That quantity is managed here as a ratio, and accessed via the
|
||||
/// double cast-operator below. The two quantities, number of cycles and
|
||||
/// number of resources, are kept separate. This is used by the
|
||||
/// ResourcePressureView to calculate the average resource cycles
|
||||
/// per instruction/iteration.
|
||||
class ResourceCycles {
|
||||
unsigned Numerator, Denominator;
|
||||
|
||||
public:
|
||||
ResourceCycles() : Numerator(0), Denominator(1) {}
|
||||
ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
|
||||
: Numerator(Cycles), Denominator(ResourceUnits) {}
|
||||
|
||||
operator double() const {
|
||||
assert(Denominator && "Invalid denominator (must be non-zero).");
|
||||
return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
|
||||
}
|
||||
|
||||
// Add the components of RHS to this instance. Instead of calculating
|
||||
// the final value here, we keep track of the numerator and denominator
|
||||
// separately, to reduce floating point error.
|
||||
ResourceCycles &operator+=(const ResourceCycles &RHS) {
|
||||
if (Denominator == RHS.Denominator)
|
||||
Numerator += RHS.Numerator;
|
||||
else {
|
||||
// Create a common denominator for LHS and RHS by calculating the least
|
||||
// common multiple from the GCD.
|
||||
unsigned GCD =
|
||||
llvm::GreatestCommonDivisor64(Denominator, RHS.Denominator);
|
||||
unsigned LCM = (Denominator * RHS.Denominator) / GCD;
|
||||
unsigned LHSNumerator = Numerator * (LCM / Denominator);
|
||||
unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
|
||||
Numerator = LHSNumerator + RHSNumerator;
|
||||
Denominator = LCM;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/// Populates vector Masks with processor resource masks.
|
||||
///
|
||||
/// The number of bits set in a mask depends on the processor resource type.
|
||||
|
|
|
@ -247,7 +247,7 @@ bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
|
|||
|
||||
void ResourceManager::issueInstruction(
|
||||
const InstrDesc &Desc,
|
||||
SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes) {
|
||||
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
|
||||
for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
|
||||
const CycleSegment &CS = R.second.CS;
|
||||
if (!CS.size()) {
|
||||
|
@ -263,8 +263,8 @@ void ResourceManager::issueInstruction(
|
|||
// Replace the resource mask with a valid processor resource index.
|
||||
const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
|
||||
Pipe.first = RS.getProcResourceID();
|
||||
Pipes.emplace_back(
|
||||
std::pair<ResourceRef, double>(Pipe, static_cast<double>(CS.size())));
|
||||
Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
|
||||
Pipe, ResourceCycles(CS.size())));
|
||||
} else {
|
||||
assert((countPopulation(R.first) > 1) && "Expected a group!");
|
||||
// Mark this group as reserved.
|
||||
|
|
|
@ -66,7 +66,7 @@ Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
|
|||
|
||||
void Scheduler::issueInstructionImpl(
|
||||
InstRef &IR,
|
||||
SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
|
||||
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
|
||||
Instruction *IS = IR.getInstruction();
|
||||
const InstrDesc &D = IS->getDesc();
|
||||
|
||||
|
@ -86,7 +86,8 @@ void Scheduler::issueInstructionImpl(
|
|||
|
||||
// Release the buffered resources and issue the instruction.
|
||||
void Scheduler::issueInstruction(
|
||||
InstRef &IR, SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources,
|
||||
InstRef &IR,
|
||||
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
|
||||
SmallVectorImpl<InstRef> &ReadyInstructions) {
|
||||
const Instruction &Inst = *IR.getInstruction();
|
||||
bool HasDependentUsers = Inst.hasDependentUsers();
|
||||
|
|
|
@ -53,11 +53,11 @@ bool ExecuteStage::isAvailable(const InstRef &IR) const {
|
|||
}
|
||||
|
||||
Error ExecuteStage::issueInstruction(InstRef &IR) {
|
||||
SmallVector<std::pair<ResourceRef, double>, 4> Used;
|
||||
SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
|
||||
SmallVector<InstRef, 4> Ready;
|
||||
HWS.issueInstruction(IR, Used, Ready);
|
||||
|
||||
notifyReservedOrReleasedBuffers(IR, /* Reserved */false);
|
||||
notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
|
||||
notifyInstructionIssued(IR, Used);
|
||||
if (IR.getInstruction()->isExecuted()) {
|
||||
notifyInstructionExecuted(IR);
|
||||
|
@ -120,7 +120,7 @@ Error ExecuteStage::execute(InstRef &IR) {
|
|||
// be released after MCIS is issued, and all the ResourceCycles for those
|
||||
// units have been consumed.
|
||||
HWS.dispatch(IR);
|
||||
notifyReservedOrReleasedBuffers(IR, /* Reserved */true);
|
||||
notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
|
||||
if (!HWS.isReady(IR))
|
||||
return ErrorSuccess();
|
||||
|
||||
|
@ -156,10 +156,10 @@ void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) {
|
|||
}
|
||||
|
||||
void ExecuteStage::notifyInstructionIssued(
|
||||
const InstRef &IR, ArrayRef<std::pair<ResourceRef, double>> Used) {
|
||||
const InstRef &IR, ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) {
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "[E] Instruction Issued: #" << IR << '\n';
|
||||
for (const std::pair<ResourceRef, unsigned> &Resource : Used) {
|
||||
for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
|
||||
dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
|
||||
<< Resource.first.second << "], ";
|
||||
dbgs() << "cycles: " << Resource.second << '\n';
|
||||
|
|
|
@ -31,17 +31,17 @@ Error InstructionTables::execute(InstRef &IR) {
|
|||
// Skip zero-cycle resources (i.e., unused resources).
|
||||
if (!Resource.second.size())
|
||||
continue;
|
||||
double Cycles = static_cast<double>(Resource.second.size());
|
||||
unsigned Cycles = Resource.second.size();
|
||||
unsigned Index = std::distance(
|
||||
Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
|
||||
const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
|
||||
unsigned NumUnits = ProcResource.NumUnits;
|
||||
if (!ProcResource.SubUnitsIdxBegin) {
|
||||
// The number of cycles consumed by each unit.
|
||||
Cycles /= NumUnits;
|
||||
for (unsigned I = 0, E = NumUnits; I < E; ++I) {
|
||||
ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
|
||||
UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles));
|
||||
UsedResources.emplace_back(
|
||||
std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -53,10 +53,10 @@ Error InstructionTables::execute(InstRef &IR) {
|
|||
unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
|
||||
const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
|
||||
// Compute the number of cycles consumed by each resource unit.
|
||||
double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits);
|
||||
for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
|
||||
ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
|
||||
UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles));
|
||||
UsedResources.emplace_back(std::make_pair(
|
||||
ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue