Recommit [TableGen][SchedModels] Fix read/write variant substitution

Original commit rG112b3cb6ba49 introduced non-determinism in subtarget
generator due to iteration over DenseMap. New patch fixes this changing
ProcModelMapTy from DenseMap to std::map.
This commit is contained in:
Evgeny Leviant 2020-12-04 21:50:34 +03:00
parent c55d9af8c0
commit 993eaf2d69
4 changed files with 83 additions and 77 deletions

View File

@ -183,11 +183,6 @@ class A57BranchForm<SchedWriteRes non_br> :
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
// Why more complex instruction uses more simple pipeline?
// May be an error in doc.
def A57WriteALUsi : SchedWriteVariant<[
// lsl #2, lsl #1, or lsr #1.
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57WriteALUsr : SchedWriteVariant<[
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
@ -200,7 +195,7 @@ def A57ReadALUsr : SchedReadVariant<[
SchedVar<IsPredicatedPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>
]>;
def : SchedAlias<WriteALUsi, A57WriteALUsi>;
def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
def : SchedAlias<WriteALUsr, A57WriteALUsr>;
def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
def : SchedAlias<ReadALUsr, A57ReadALUsr>;

View File

@ -0,0 +1,34 @@
# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s
# CHECK-LABEL: ********** MI Scheduling **********
# CHECK: %[[RES:[0-9]+]]:rgpr = t2MLA
# CHECK-NEXT: # preds left
# CHECK-NEXT: # succs left
# CHECK-NEXT: # rdefs left
# CHECK-NEXT: Latency : 3
# CHECK-NEXT: Depth
# CHECK-NEXT: Height
# CHECK-NEXT: Predecessors:
# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
# CHECK-NEXT: SU({{.*}}): Out Latency=
# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
# CHECK-NEXT: Successors:
# CHECK-NEXT: SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]]
# CHECK-NEXT: Pressure Diff
# CHECK-NEXT: Single Issue : false;
# CHECK-NEXT: SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg
name: test_smlal_forwarding
tracksRegLiveness: true
body: |
bb.0:
liveins: $r1, $r3, $r4, $r5, $r6
%1:rgpr = COPY $r1
%3:rgpr = COPY $r3
%4:rgpr = COPY $r4
%5:rgpr = COPY $r5
%6:rgpr = COPY $r6
%3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg
%6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg
$r0 = COPY %6:rgpr
BX_RET 14, $noreg, implicit $r0

View File

@ -283,7 +283,7 @@ static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
static void
processSTIPredicate(STIPredicateFunction &Fn,
const DenseMap<Record *, unsigned> &ProcModelMap) {
const ProcModelMapTy &ProcModelMap) {
DenseMap<const Record *, unsigned> Opcode2Index;
using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
std::vector<OpcodeMapPair> OpcodeMappings;
@ -1338,8 +1338,7 @@ public:
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
bool IsRead, bool IsForAnyCPU,
unsigned StartIdx);
bool IsRead, unsigned StartIdx);
bool substituteVariants(const PredTransition &Trans);
@ -1413,29 +1412,6 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
return false;
}
static bool hasAliasedVariants(const CodeGenSchedRW &RW,
CodeGenSchedModels &SchedModels) {
if (RW.HasVariants)
return true;
for (Record *Alias : RW.Aliases) {
const CodeGenSchedRW &AliasRW =
SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
if (AliasRW.HasVariants)
return true;
if (AliasRW.IsSequence) {
IdxVec ExpandedRWs;
SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
for (unsigned SI : ExpandedRWs) {
if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
SchedModels))
return true;
}
}
}
return false;
}
static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
ArrayRef<unsigned> ProcIndices) {
std::vector<Record *> Preds;
@ -1613,21 +1589,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
// starts. RWSeq must be applied to all transitions between StartIdx and the end
// of TransVec.
bool PredTransitions::substituteVariantOperand(
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
unsigned StartIdx) {
auto CollectAndAddVariants = [&](unsigned TransIdx,
const CodeGenSchedRW &SchedRW) {
// Distribute this partial PredTransition across intersecting variants.
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
std::vector<TransVariant> IntersectingVariants;
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
// Now expand each variant on top of its copy of the transition.
for (const TransVariant &IV : IntersectingVariants)
pushVariant(IV, IsRead);
return !IntersectingVariants.empty();
};
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
bool Subst = false;
// Visit each original RW within the current sequence.
for (SmallVectorImpl<unsigned>::const_iterator
@ -1636,35 +1598,24 @@ bool PredTransitions::substituteVariantOperand(
// Push this RW on all partial PredTransitions or distribute variants.
// New PredTransitions may be pushed within this loop which should not be
// revisited (TransEnd must be loop invariant).
bool HasAliases = false, WasPushed = false;
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
TransIdx != TransEnd; ++TransIdx) {
// In the common case, push RW onto the current operand's sequence.
if (!hasAliasedVariants(SchedRW, SchedModels)) {
// Distribute this partial PredTransition across intersecting variants.
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
std::vector<TransVariant> IntersectingVariants;
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
// Now expand each variant on top of its copy of the transition.
for (const TransVariant &IV : IntersectingVariants)
pushVariant(IV, IsRead);
if (IntersectingVariants.empty()) {
if (IsRead)
TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
else
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
continue;
} else {
Subst = true;
}
HasAliases = true;
WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
Subst |= WasPushed;
}
if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
// If we're here this means that in some sched class:
// a) We have read variant for CPU A
// b) We have write variant for CPU B
// b) We don't have write variant for CPU A
// d) We must expand all read/write variants (IsForAnyCPU is true)
// e) We couldn't expand SchedRW because TransVec doesn't have
// any transition with compatible CPU ID.
// In such case we create new empty transition with zero (AnyCPU)
// index.
TransVec.reserve(TransVec.size() + 1);
TransVec.emplace_back(TransVec[StartIdx].PredTerm);
TransVec.back().ReadSequences.emplace_back();
Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW);
}
}
return Subst;
@ -1683,7 +1634,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
bool Subst = false;
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
assert(!llvm::count(Trans.ProcIndices, 0));
// Visit each original write sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@ -1693,8 +1644,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->WriteSequences.emplace_back();
}
Subst |=
substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
}
// Visit each original read sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@ -1705,8 +1655,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->ReadSequences.emplace_back();
}
Subst |=
substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
}
return Subst;
}
@ -1745,6 +1694,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
// requires creating a new SchedClass.
for (ArrayRef<PredTransition>::iterator
I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
// Variant expansion (substituteVariants) may create unconditional
// transitions. We don't need to build sched classes for them.
if (I->PredTerm.empty())
continue;
IdxVec OperWritesVariant, OperReadsVariant;
addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
@ -1777,6 +1730,26 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
}
}
std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
std::vector<unsigned> ProcIdVec;
for (const auto &PM : ProcModelMap)
if (PM.second != 0)
ProcIdVec.push_back(PM.second);
return ProcIdVec;
}
static std::vector<PredTransition>
makePerProcessorTransitions(const PredTransition &Trans,
ArrayRef<unsigned> ProcIndices) {
std::vector<PredTransition> PerCpuTransVec;
for (unsigned ProcId : ProcIndices) {
assert(ProcId != 0);
PerCpuTransVec.push_back(Trans);
PerCpuTransVec.back().ProcIndices.assign(1, ProcId);
}
return PerCpuTransVec;
}
// Create new SchedClasses for the given ReadWrite list. If any of the
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
// of the ReadWrite list, following Aliases if necessary.
@ -1812,6 +1785,10 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
}
LLVM_DEBUG(dbgs() << '\n');
LastTransitions = makePerProcessorTransitions(
LastTransitions[0], llvm::count(ProcIndices, 0)
? ArrayRef<unsigned>(getAllProcIndices())
: ProcIndices);
// Collect all PredTransitions for individual operands.
// Iterate until no variant writes remain.
bool SubstitutedAny;
@ -1823,9 +1800,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
LLVM_DEBUG(Transitions.dump());
LastTransitions.swap(Transitions.TransVec);
} while (SubstitutedAny);
// If the first transition has no variants, nothing to do.
if (LastTransitions[0].PredTerm.empty())
return;
// WARNING: We are about to mutate the SchedClasses vector. Do not refer to
// OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.

View File

@ -21,6 +21,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
#include <map>
namespace llvm {
@ -409,6 +410,8 @@ public:
ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
};
using ProcModelMapTy = std::map<const Record*, unsigned>;
/// Top level container for machine model data.
class CodeGenSchedModels {
RecordKeeper &Records;
@ -421,7 +424,6 @@ class CodeGenSchedModels {
std::vector<CodeGenProcModel> ProcModels;
// Map Processor's MachineModel or ProcItin to a CodeGenProcModel index.
using ProcModelMapTy = DenseMap<Record*, unsigned>;
ProcModelMapTy ProcModelMap;
// Per-operand SchedReadWrite types.
@ -443,6 +445,7 @@ class CodeGenSchedModels {
InstClassMapTy InstrClassMap;
std::vector<STIPredicateFunction> STIPredicates;
std::vector<unsigned> getAllProcIndices() const;
public:
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);