[llvm-mca] Delay calculation of Cycles per Resources, separate the cycles and resource quantities.

Summary:
This patch removes the storing of accumulated floating point data 
within the llvm-mca library.

This patch splits-up the two quantities: cycles and number of resource units.
By splitting-up these two quantities, we delay the calculation of "cycles per resource unit"
until that value is read, reducing the chance of accumulating floating point error. 

I considered using the APFloat, but after measuring performance, for a large (many iteration)
sample, I decided to go with this faster solution.

Reviewers: andreadb, courbet, RKSimon

Reviewed By: andreadb

Subscribers: llvm-commits, javed.absar, tschuett, gbedwell

Differential Revision: https://reviews.llvm.org/D51903

llvm-svn: 341980
This commit is contained in:
Matt Davis 2018-09-11 18:47:48 +00:00
parent 44c1b3a331
commit db834837c2
12 changed files with 76 additions and 31 deletions

View File

@ -46,7 +46,8 @@ void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
return;
const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
for (const std::pair<ResourceRef, double> &Use : IssueEvent.UsedResources) {
for (const std::pair<ResourceRef, ResourceCycles> &Use :
IssueEvent.UsedResources) {
const ResourceRef &RR = Use.first;
assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
unsigned R2VIndex = Resource2VecIndex[RR.first];

View File

@ -79,7 +79,7 @@ class ResourcePressureView : public View {
llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
// Table of resources used by instructions.
std::vector<double> ResourceUsage;
std::vector<ResourceCycles> ResourceUsage;
unsigned NumResourceUnits;
const llvm::MCInst &GetMCInstFromIndex(unsigned Index) const;

View File

@ -16,8 +16,8 @@
#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
#include "Instruction.h"
#include "Support.h"
#include "llvm/ADT/ArrayRef.h"
#include <utility>
namespace mca {
@ -61,11 +61,12 @@ public:
class HWInstructionIssuedEvent : public HWInstructionEvent {
public:
using ResourceRef = std::pair<uint64_t, uint64_t>;
HWInstructionIssuedEvent(const InstRef &IR,
llvm::ArrayRef<std::pair<ResourceRef, double>> UR)
HWInstructionIssuedEvent(
const InstRef &IR,
llvm::ArrayRef<std::pair<ResourceRef, ResourceCycles>> UR)
: HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
llvm::ArrayRef<std::pair<ResourceRef, double>> UsedResources;
llvm::ArrayRef<std::pair<ResourceRef, ResourceCycles>> UsedResources;
};
class HWInstructionDispatchedEvent : public HWInstructionEvent {

View File

@ -17,6 +17,7 @@
#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
#include "Instruction.h"
#include "Support.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@ -344,7 +345,7 @@ public:
void issueInstruction(
const InstrDesc &Desc,
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
llvm::SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);

View File

@ -18,6 +18,7 @@
#include "HardwareUnits/HardwareUnit.h"
#include "HardwareUnits/LSUnit.h"
#include "ResourceManager.h"
#include "Support.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSchedule.h"
@ -103,7 +104,7 @@ class Scheduler : public HardwareUnit {
/// Issue an instruction without updating the ready queue.
void issueInstructionImpl(
InstRef &IR,
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
llvm::SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes);
// Identify instructions that have finished executing, and remove them from
// the IssuedSet. References to executed instructions are added to input
@ -164,9 +165,9 @@ public:
/// Issue an instruction and populates a vector of used pipeline resources,
/// and a vector of instructions that transitioned to the ready state as a
/// result of this event.
void
issueInstruction(InstRef &IR,
llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used,
void issueInstruction(
InstRef &IR,
llvm::SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
llvm::SmallVectorImpl<InstRef> &Ready);
/// Returns true if IR has to be issued immediately, or if IR is a zero

View File

@ -59,9 +59,9 @@ public:
llvm::Error cycleStart() override;
llvm::Error execute(InstRef &IR) override;
void
notifyInstructionIssued(const InstRef &IR,
llvm::ArrayRef<std::pair<ResourceRef, double>> Used);
void notifyInstructionIssued(
const InstRef &IR,
llvm::ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used);
void notifyInstructionExecuted(const InstRef &IR);
void notifyInstructionReady(const InstRef &IR);
void notifyResourceAvailable(const ResourceRef &RR);

View File

@ -28,7 +28,7 @@ namespace mca {
class InstructionTables final : public Stage {
const llvm::MCSchedModel &SM;
InstrBuilder &IB;
llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
llvm::SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
public:
InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)

View File

@ -21,6 +21,46 @@
namespace mca {
/// This class represents the number of cycles per resource (fractions of
/// cycles). That quantity is managed here as a ratio, and accessed via the
/// double cast-operator below. The two quantities, number of cycles and
/// number of resources, are kept separate. This is used by the
/// ResourcePressureView to calculate the average resource cycles
/// per instruction/iteration.
class ResourceCycles {
unsigned Numerator, Denominator;
public:
ResourceCycles() : Numerator(0), Denominator(1) {}
ResourceCycles(unsigned Cycles, unsigned ResourceUnits = 1)
: Numerator(Cycles), Denominator(ResourceUnits) {}
operator double() const {
assert(Denominator && "Invalid denominator (must be non-zero).");
return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
}
// Add the components of RHS to this instance. Instead of calculating
// the final value here, we keep track of the numerator and denominator
// separately, to reduce floating point error.
ResourceCycles &operator+=(const ResourceCycles &RHS) {
if (Denominator == RHS.Denominator)
Numerator += RHS.Numerator;
else {
// Create a common denominator for LHS and RHS by calculating the least
// common multiple from the GCD.
unsigned GCD =
llvm::GreatestCommonDivisor64(Denominator, RHS.Denominator);
unsigned LCM = (Denominator * RHS.Denominator) / GCD;
unsigned LHSNumerator = Numerator * (LCM / Denominator);
unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
Numerator = LHSNumerator + RHSNumerator;
Denominator = LCM;
}
return *this;
}
};
/// Populates vector Masks with processor resource masks.
///
/// The number of bits set in a mask depends on the processor resource type.

View File

@ -247,7 +247,7 @@ bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
void ResourceManager::issueInstruction(
const InstrDesc &Desc,
SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes) {
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes) {
for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
const CycleSegment &CS = R.second.CS;
if (!CS.size()) {
@ -263,8 +263,8 @@ void ResourceManager::issueInstruction(
// Replace the resource mask with a valid processor resource index.
const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
Pipe.first = RS.getProcResourceID();
Pipes.emplace_back(
std::pair<ResourceRef, double>(Pipe, static_cast<double>(CS.size())));
Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
Pipe, ResourceCycles(CS.size())));
} else {
assert((countPopulation(R.first) > 1) && "Expected a group!");
// Mark this group as reserved.

View File

@ -66,7 +66,7 @@ Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
void Scheduler::issueInstructionImpl(
InstRef &IR,
SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources) {
Instruction *IS = IR.getInstruction();
const InstrDesc &D = IS->getDesc();
@ -86,7 +86,8 @@ void Scheduler::issueInstructionImpl(
// Release the buffered resources and issue the instruction.
void Scheduler::issueInstruction(
InstRef &IR, SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources,
InstRef &IR,
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
SmallVectorImpl<InstRef> &ReadyInstructions) {
const Instruction &Inst = *IR.getInstruction();
bool HasDependentUsers = Inst.hasDependentUsers();

View File

@ -53,11 +53,11 @@ bool ExecuteStage::isAvailable(const InstRef &IR) const {
}
Error ExecuteStage::issueInstruction(InstRef &IR) {
SmallVector<std::pair<ResourceRef, double>, 4> Used;
SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
SmallVector<InstRef, 4> Ready;
HWS.issueInstruction(IR, Used, Ready);
notifyReservedOrReleasedBuffers(IR, /* Reserved */false);
notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
notifyInstructionIssued(IR, Used);
if (IR.getInstruction()->isExecuted()) {
notifyInstructionExecuted(IR);
@ -120,7 +120,7 @@ Error ExecuteStage::execute(InstRef &IR) {
// be released after MCIS is issued, and all the ResourceCycles for those
// units have been consumed.
HWS.dispatch(IR);
notifyReservedOrReleasedBuffers(IR, /* Reserved */true);
notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
if (!HWS.isReady(IR))
return ErrorSuccess();
@ -156,10 +156,10 @@ void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) {
}
void ExecuteStage::notifyInstructionIssued(
const InstRef &IR, ArrayRef<std::pair<ResourceRef, double>> Used) {
const InstRef &IR, ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) {
LLVM_DEBUG({
dbgs() << "[E] Instruction Issued: #" << IR << '\n';
for (const std::pair<ResourceRef, unsigned> &Resource : Used) {
for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
<< Resource.first.second << "], ";
dbgs() << "cycles: " << Resource.second << '\n';

View File

@ -31,17 +31,17 @@ Error InstructionTables::execute(InstRef &IR) {
// Skip zero-cycle resources (i.e., unused resources).
if (!Resource.second.size())
continue;
double Cycles = static_cast<double>(Resource.second.size());
unsigned Cycles = Resource.second.size();
unsigned Index = std::distance(
Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
unsigned NumUnits = ProcResource.NumUnits;
if (!ProcResource.SubUnitsIdxBegin) {
// The number of cycles consumed by each unit.
Cycles /= NumUnits;
for (unsigned I = 0, E = NumUnits; I < E; ++I) {
ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles));
UsedResources.emplace_back(
std::make_pair(ResourceUnit, ResourceCycles(Cycles, NumUnits)));
}
continue;
}
@ -53,10 +53,10 @@ Error InstructionTables::execute(InstRef &IR) {
unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
// Compute the number of cycles consumed by each resource unit.
double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits);
for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles));
UsedResources.emplace_back(std::make_pair(
ResourceUnit, ResourceCycles(Cycles, NumUnits * SubUnit.NumUnits)));
}
}
}