forked from OSchip/llvm-project
[MCA] Correctly check pipeline availability for partially overlapping resource groups.
This patch mostly reverts commit 70b37f4c03
which fixed PR50725.
In case of explicit consumption of multiple partially overlapping group
resources, the ResourceManager was not correctly checking pipeline
esources availability.
The fix for PR50725 only partially addressed a few instances of that issue.
This is a more general (although, technically slower) fix for that same issue.
It also fixes Issue #57548
Thanks to Haohai Wen for the small reproducible.
This commit is contained in:
parent
8019b46bc7
commit
3262794804
llvm
include/llvm/MCA
lib/MCA
test/tools/llvm-mca/X86/AlderlakeP
|
@ -458,9 +458,6 @@ struct InstrDesc {
|
|||
// A bitmask of used processor resource units.
|
||||
uint64_t UsedProcResUnits;
|
||||
|
||||
// A bitmask of implicit uses of processor resource units.
|
||||
uint64_t ImplicitlyUsedProcResUnits;
|
||||
|
||||
// A bitmask of used processor resource groups.
|
||||
uint64_t UsedProcResGroups;
|
||||
|
||||
|
@ -481,6 +478,9 @@ struct InstrDesc {
|
|||
// recycled.
|
||||
unsigned IsRecyclable : 1;
|
||||
|
||||
// True if some of the consumed group resources are partially overlapping.
|
||||
unsigned HasPartiallyOverlappingGroups : 1;
|
||||
|
||||
// A zero latency instruction doesn't consume any scheduler resources.
|
||||
bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
|
||||
|
||||
|
|
|
@ -281,26 +281,67 @@ void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) {
|
|||
|
||||
uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
|
||||
uint64_t BusyResourceMask = 0;
|
||||
uint64_t ConsumedResourceMask = 0;
|
||||
DenseMap<uint64_t, unsigned> AvailableUnits;
|
||||
|
||||
for (const std::pair<uint64_t, ResourceUsage> &E : Desc.Resources) {
|
||||
unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
|
||||
unsigned Index = getResourceStateIndex(E.first);
|
||||
if (!Resources[Index]->isReady(NumUnits))
|
||||
const ResourceState &RS = *Resources[getResourceStateIndex(E.first)];
|
||||
if (!RS.isReady(NumUnits)) {
|
||||
BusyResourceMask |= E.first;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
|
||||
while (ImplicitUses) {
|
||||
uint64_t Use = ImplicitUses & -ImplicitUses;
|
||||
ImplicitUses ^= Use;
|
||||
unsigned Index = getResourceStateIndex(Use);
|
||||
if (!Resources[Index]->isReady(/* NumUnits */ 1))
|
||||
BusyResourceMask |= Index;
|
||||
if (Desc.HasPartiallyOverlappingGroups && !RS.isAResourceGroup()) {
|
||||
unsigned NumAvailableUnits = countPopulation(RS.getReadyMask());
|
||||
NumAvailableUnits -= NumUnits;
|
||||
AvailableUnits[E.first] = NumAvailableUnits;
|
||||
if (!NumAvailableUnits)
|
||||
ConsumedResourceMask |= E.first;
|
||||
}
|
||||
}
|
||||
|
||||
BusyResourceMask &= ProcResUnitMask;
|
||||
if (BusyResourceMask)
|
||||
return BusyResourceMask;
|
||||
return Desc.UsedProcResGroups & ReservedResourceGroups;
|
||||
|
||||
BusyResourceMask = Desc.UsedProcResGroups & ReservedResourceGroups;
|
||||
if (!Desc.HasPartiallyOverlappingGroups || BusyResourceMask)
|
||||
return BusyResourceMask;
|
||||
|
||||
// If this instruction has overlapping groups, make sure that we can
|
||||
// select at least one unit per group.
|
||||
for (const std::pair<uint64_t, ResourceUsage> &E : Desc.Resources) {
|
||||
const ResourceState &RS = *Resources[getResourceStateIndex(E.first)];
|
||||
if (!E.second.isReserved() && RS.isAResourceGroup()) {
|
||||
uint64_t ReadyMask = RS.getReadyMask() & ~ConsumedResourceMask;
|
||||
if (!ReadyMask) {
|
||||
BusyResourceMask |= RS.getReadyMask();
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t ResourceMask = PowerOf2Floor(ReadyMask);
|
||||
|
||||
auto it = AvailableUnits.find(ResourceMask);
|
||||
if (it == AvailableUnits.end()) {
|
||||
unsigned Index = getResourceStateIndex(ResourceMask);
|
||||
unsigned NumUnits = countPopulation(Resources[Index]->getReadyMask());
|
||||
it =
|
||||
AvailableUnits.insert(std::make_pair(ResourceMask, NumUnits)).first;
|
||||
}
|
||||
|
||||
if (!it->second) {
|
||||
BusyResourceMask |= it->first;
|
||||
continue;
|
||||
}
|
||||
|
||||
it->second--;
|
||||
if (!it->second)
|
||||
ConsumedResourceMask |= it->first;
|
||||
}
|
||||
}
|
||||
|
||||
return BusyResourceMask;
|
||||
}
|
||||
|
||||
void ResourceManager::issueInstruction(
|
||||
|
|
|
@ -112,13 +112,12 @@ static void initializeUsedResources(InstrDesc &ID,
|
|||
|
||||
uint64_t UsedResourceUnits = 0;
|
||||
uint64_t UsedResourceGroups = 0;
|
||||
auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
|
||||
return countPopulation(Elt.first) > 1;
|
||||
});
|
||||
unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
|
||||
uint64_t ImpliedUsesOfResourceUnits = 0;
|
||||
uint64_t UnitsFromResourceGroups = 0;
|
||||
|
||||
// Remove cycles contributed by smaller resources, and check if there
|
||||
// are partially overlapping resource groups.
|
||||
ID.HasPartiallyOverlappingGroups = false;
|
||||
|
||||
// Remove cycles contributed by smaller resources.
|
||||
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
|
||||
ResourcePlusCycles &A = Worklist[I];
|
||||
if (!A.second.size()) {
|
||||
|
@ -129,21 +128,17 @@ static void initializeUsedResources(InstrDesc &ID,
|
|||
|
||||
ID.Resources.emplace_back(A);
|
||||
uint64_t NormalizedMask = A.first;
|
||||
|
||||
if (countPopulation(A.first) == 1) {
|
||||
UsedResourceUnits |= A.first;
|
||||
} else {
|
||||
// Remove the leading 1 from the resource group mask.
|
||||
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
|
||||
UsedResourceGroups |= (A.first ^ NormalizedMask);
|
||||
if (UnitsFromResourceGroups & NormalizedMask)
|
||||
ID.HasPartiallyOverlappingGroups = true;
|
||||
|
||||
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
|
||||
if ((NormalizedMask != AvailableMask) &&
|
||||
countPopulation(AvailableMask) == 1) {
|
||||
// At simulation time, this resource group use will decay into a simple
|
||||
// use of the resource unit identified by `AvailableMask`.
|
||||
ImpliedUsesOfResourceUnits |= AvailableMask;
|
||||
UsedResourceUnits |= AvailableMask;
|
||||
}
|
||||
UnitsFromResourceGroups |= NormalizedMask;
|
||||
UsedResourceGroups |= (A.first ^ NormalizedMask);
|
||||
}
|
||||
|
||||
for (unsigned J = I + 1; J < E; ++J) {
|
||||
|
@ -156,31 +151,6 @@ static void initializeUsedResources(InstrDesc &ID,
|
|||
}
|
||||
}
|
||||
|
||||
// Look for implicit uses of processor resource units. These are resource
|
||||
// units which are indirectly consumed by resource groups, and that must be
|
||||
// always available on instruction issue.
|
||||
while (ImpliedUsesOfResourceUnits) {
|
||||
ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
|
||||
ImpliedUsesOfResourceUnits = 0;
|
||||
for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
|
||||
ResourcePlusCycles &A = Worklist[I];
|
||||
if (!A.second.size())
|
||||
continue;
|
||||
|
||||
uint64_t NormalizedMask = A.first;
|
||||
assert(countPopulation(NormalizedMask) > 1);
|
||||
// Remove the leading 1 from the resource group mask.
|
||||
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
|
||||
uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
|
||||
if ((NormalizedMask != AvailableMask) &&
|
||||
countPopulation(AvailableMask) != 1)
|
||||
continue;
|
||||
|
||||
UsedResourceUnits |= AvailableMask;
|
||||
ImpliedUsesOfResourceUnits |= AvailableMask;
|
||||
}
|
||||
}
|
||||
|
||||
// A SchedWrite may specify a number of cycles in which a resource group
|
||||
// is reserved. For example (on target x86; cpu Haswell):
|
||||
//
|
||||
|
@ -240,10 +210,10 @@ static void initializeUsedResources(InstrDesc &ID,
|
|||
BufferIDs ^= Current;
|
||||
}
|
||||
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
|
||||
dbgs() << "\t\tImplicitly Used Units="
|
||||
<< format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
|
||||
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
|
||||
<< '\n';
|
||||
dbgs() << "\t\tHasPartiallyOverlappingGroups="
|
||||
<< ID.HasPartiallyOverlappingGroups << '\n';
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -all-views=false -summary-view < %s | FileCheck %s
|
||||
|
||||
# Issue #57548
|
||||
|
||||
# Do not crash when simulating instructions that consume partially overlapping
|
||||
# resource groups.
|
||||
|
||||
vpsllw %xmm1, %ymm0, %ymm0
|
||||
vpsllw %xmm1, %xmm2, %xmm1
|
||||
vpand %ymm1, %ymm0, %ymm0
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 300
|
||||
# CHECK-NEXT: Total Cycles: 503
|
||||
# CHECK-NEXT: Total uOps: 500
|
||||
|
||||
# CHECK: Dispatch Width: 6
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.99
|
||||
# CHECK-NEXT: IPC: 0.60
|
||||
# CHECK-NEXT: Block RThroughput: 1.0
|
Loading…
Reference in New Issue