forked from OSchip/llvm-project
Recommit [TableGen][SchedModels] Fix read/write variant substitution
Original commit rG112b3cb6ba49 introduced non-determinism in subtarget generator due to iteration over DenseMap. New patch fixes this changing ProcModelMapTy from DenseMap to std::map.
This commit is contained in:
parent
c55d9af8c0
commit
993eaf2d69
|
@ -183,11 +183,6 @@ class A57BranchForm<SchedWriteRes non_br> :
|
|||
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
|
||||
// Why more complex instruction uses more simple pipeline?
|
||||
// May be an error in doc.
|
||||
def A57WriteALUsi : SchedWriteVariant<[
|
||||
// lsl #2, lsl #1, or lsr #1.
|
||||
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
|
||||
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
|
||||
]>;
|
||||
def A57WriteALUsr : SchedWriteVariant<[
|
||||
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
|
||||
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
|
||||
|
@ -200,7 +195,7 @@ def A57ReadALUsr : SchedReadVariant<[
|
|||
SchedVar<IsPredicatedPred, [ReadDefault]>,
|
||||
SchedVar<NoSchedPred, [ReadDefault]>
|
||||
]>;
|
||||
def : SchedAlias<WriteALUsi, A57WriteALUsi>;
|
||||
def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
|
||||
def : SchedAlias<WriteALUsr, A57WriteALUsr>;
|
||||
def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
|
||||
def : SchedAlias<ReadALUsr, A57ReadALUsr>;
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK-LABEL: ********** MI Scheduling **********
|
||||
# CHECK: %[[RES:[0-9]+]]:rgpr = t2MLA
|
||||
# CHECK-NEXT: # preds left
|
||||
# CHECK-NEXT: # succs left
|
||||
# CHECK-NEXT: # rdefs left
|
||||
# CHECK-NEXT: Latency : 3
|
||||
# CHECK-NEXT: Depth
|
||||
# CHECK-NEXT: Height
|
||||
# CHECK-NEXT: Predecessors:
|
||||
# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
|
||||
# CHECK-NEXT: SU({{.*}}): Out Latency=
|
||||
# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
|
||||
# CHECK-NEXT: Successors:
|
||||
# CHECK-NEXT: SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]]
|
||||
# CHECK-NEXT: Pressure Diff
|
||||
# CHECK-NEXT: Single Issue : false;
|
||||
# CHECK-NEXT: SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg
|
||||
|
||||
name: test_smlal_forwarding
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $r1, $r3, $r4, $r5, $r6
|
||||
%1:rgpr = COPY $r1
|
||||
%3:rgpr = COPY $r3
|
||||
%4:rgpr = COPY $r4
|
||||
%5:rgpr = COPY $r5
|
||||
%6:rgpr = COPY $r6
|
||||
%3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg
|
||||
%6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg
|
||||
$r0 = COPY %6:rgpr
|
||||
BX_RET 14, $noreg, implicit $r0
|
|
@ -283,7 +283,7 @@ static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
|
|||
|
||||
static void
|
||||
processSTIPredicate(STIPredicateFunction &Fn,
|
||||
const DenseMap<Record *, unsigned> &ProcModelMap) {
|
||||
const ProcModelMapTy &ProcModelMap) {
|
||||
DenseMap<const Record *, unsigned> Opcode2Index;
|
||||
using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
|
||||
std::vector<OpcodeMapPair> OpcodeMappings;
|
||||
|
@ -1338,8 +1338,7 @@ public:
|
|||
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
|
||||
|
||||
bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
|
||||
bool IsRead, bool IsForAnyCPU,
|
||||
unsigned StartIdx);
|
||||
bool IsRead, unsigned StartIdx);
|
||||
|
||||
bool substituteVariants(const PredTransition &Trans);
|
||||
|
||||
|
@ -1413,29 +1412,6 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool hasAliasedVariants(const CodeGenSchedRW &RW,
|
||||
CodeGenSchedModels &SchedModels) {
|
||||
if (RW.HasVariants)
|
||||
return true;
|
||||
|
||||
for (Record *Alias : RW.Aliases) {
|
||||
const CodeGenSchedRW &AliasRW =
|
||||
SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
|
||||
if (AliasRW.HasVariants)
|
||||
return true;
|
||||
if (AliasRW.IsSequence) {
|
||||
IdxVec ExpandedRWs;
|
||||
SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
|
||||
for (unsigned SI : ExpandedRWs) {
|
||||
if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
|
||||
SchedModels))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
|
||||
ArrayRef<unsigned> ProcIndices) {
|
||||
std::vector<Record *> Preds;
|
||||
|
@ -1613,21 +1589,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
|
|||
// starts. RWSeq must be applied to all transitions between StartIdx and the end
|
||||
// of TransVec.
|
||||
bool PredTransitions::substituteVariantOperand(
|
||||
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
|
||||
unsigned StartIdx) {
|
||||
|
||||
auto CollectAndAddVariants = [&](unsigned TransIdx,
|
||||
const CodeGenSchedRW &SchedRW) {
|
||||
// Distribute this partial PredTransition across intersecting variants.
|
||||
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
|
||||
std::vector<TransVariant> IntersectingVariants;
|
||||
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
|
||||
// Now expand each variant on top of its copy of the transition.
|
||||
for (const TransVariant &IV : IntersectingVariants)
|
||||
pushVariant(IV, IsRead);
|
||||
return !IntersectingVariants.empty();
|
||||
};
|
||||
|
||||
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
|
||||
bool Subst = false;
|
||||
// Visit each original RW within the current sequence.
|
||||
for (SmallVectorImpl<unsigned>::const_iterator
|
||||
|
@ -1636,35 +1598,24 @@ bool PredTransitions::substituteVariantOperand(
|
|||
// Push this RW on all partial PredTransitions or distribute variants.
|
||||
// New PredTransitions may be pushed within this loop which should not be
|
||||
// revisited (TransEnd must be loop invariant).
|
||||
bool HasAliases = false, WasPushed = false;
|
||||
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
|
||||
TransIdx != TransEnd; ++TransIdx) {
|
||||
// In the common case, push RW onto the current operand's sequence.
|
||||
if (!hasAliasedVariants(SchedRW, SchedModels)) {
|
||||
// Distribute this partial PredTransition across intersecting variants.
|
||||
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
|
||||
std::vector<TransVariant> IntersectingVariants;
|
||||
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
|
||||
// Now expand each variant on top of its copy of the transition.
|
||||
for (const TransVariant &IV : IntersectingVariants)
|
||||
pushVariant(IV, IsRead);
|
||||
if (IntersectingVariants.empty()) {
|
||||
if (IsRead)
|
||||
TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
|
||||
else
|
||||
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
|
||||
continue;
|
||||
} else {
|
||||
Subst = true;
|
||||
}
|
||||
HasAliases = true;
|
||||
WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
|
||||
Subst |= WasPushed;
|
||||
}
|
||||
if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
|
||||
// If we're here this means that in some sched class:
|
||||
// a) We have read variant for CPU A
|
||||
// b) We have write variant for CPU B
|
||||
// b) We don't have write variant for CPU A
|
||||
// d) We must expand all read/write variants (IsForAnyCPU is true)
|
||||
// e) We couldn't expand SchedRW because TransVec doesn't have
|
||||
// any transition with compatible CPU ID.
|
||||
// In such case we create new empty transition with zero (AnyCPU)
|
||||
// index.
|
||||
TransVec.reserve(TransVec.size() + 1);
|
||||
TransVec.emplace_back(TransVec[StartIdx].PredTerm);
|
||||
TransVec.back().ReadSequences.emplace_back();
|
||||
Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW);
|
||||
}
|
||||
}
|
||||
return Subst;
|
||||
|
@ -1683,7 +1634,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
|
|||
bool Subst = false;
|
||||
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
|
||||
|
||||
bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
|
||||
assert(!llvm::count(Trans.ProcIndices, 0));
|
||||
// Visit each original write sequence.
|
||||
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
|
||||
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
|
||||
|
@ -1693,8 +1644,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
|
|||
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
|
||||
I->WriteSequences.emplace_back();
|
||||
}
|
||||
Subst |=
|
||||
substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
|
||||
Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
|
||||
}
|
||||
// Visit each original read sequence.
|
||||
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
|
||||
|
@ -1705,8 +1655,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
|
|||
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
|
||||
I->ReadSequences.emplace_back();
|
||||
}
|
||||
Subst |=
|
||||
substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
|
||||
Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
|
||||
}
|
||||
return Subst;
|
||||
}
|
||||
|
@ -1745,6 +1694,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
|
|||
// requires creating a new SchedClass.
|
||||
for (ArrayRef<PredTransition>::iterator
|
||||
I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
|
||||
// Variant expansion (substituteVariants) may create unconditional
|
||||
// transitions. We don't need to build sched classes for them.
|
||||
if (I->PredTerm.empty())
|
||||
continue;
|
||||
IdxVec OperWritesVariant, OperReadsVariant;
|
||||
addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
|
||||
addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
|
||||
|
@ -1777,6 +1730,26 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
|
||||
std::vector<unsigned> ProcIdVec;
|
||||
for (const auto &PM : ProcModelMap)
|
||||
if (PM.second != 0)
|
||||
ProcIdVec.push_back(PM.second);
|
||||
return ProcIdVec;
|
||||
}
|
||||
|
||||
static std::vector<PredTransition>
|
||||
makePerProcessorTransitions(const PredTransition &Trans,
|
||||
ArrayRef<unsigned> ProcIndices) {
|
||||
std::vector<PredTransition> PerCpuTransVec;
|
||||
for (unsigned ProcId : ProcIndices) {
|
||||
assert(ProcId != 0);
|
||||
PerCpuTransVec.push_back(Trans);
|
||||
PerCpuTransVec.back().ProcIndices.assign(1, ProcId);
|
||||
}
|
||||
return PerCpuTransVec;
|
||||
}
|
||||
|
||||
// Create new SchedClasses for the given ReadWrite list. If any of the
|
||||
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
|
||||
// of the ReadWrite list, following Aliases if necessary.
|
||||
|
@ -1812,6 +1785,10 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
|
|||
}
|
||||
LLVM_DEBUG(dbgs() << '\n');
|
||||
|
||||
LastTransitions = makePerProcessorTransitions(
|
||||
LastTransitions[0], llvm::count(ProcIndices, 0)
|
||||
? ArrayRef<unsigned>(getAllProcIndices())
|
||||
: ProcIndices);
|
||||
// Collect all PredTransitions for individual operands.
|
||||
// Iterate until no variant writes remain.
|
||||
bool SubstitutedAny;
|
||||
|
@ -1823,9 +1800,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
|
|||
LLVM_DEBUG(Transitions.dump());
|
||||
LastTransitions.swap(Transitions.TransVec);
|
||||
} while (SubstitutedAny);
|
||||
// If the first transition has no variants, nothing to do.
|
||||
if (LastTransitions[0].PredTerm.empty())
|
||||
return;
|
||||
|
||||
// WARNING: We are about to mutate the SchedClasses vector. Do not refer to
|
||||
// OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/TableGen/Record.h"
|
||||
#include "llvm/TableGen/SetTheory.h"
|
||||
#include <map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -409,6 +410,8 @@ public:
|
|||
ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
|
||||
};
|
||||
|
||||
using ProcModelMapTy = std::map<const Record*, unsigned>;
|
||||
|
||||
/// Top level container for machine model data.
|
||||
class CodeGenSchedModels {
|
||||
RecordKeeper &Records;
|
||||
|
@ -421,7 +424,6 @@ class CodeGenSchedModels {
|
|||
std::vector<CodeGenProcModel> ProcModels;
|
||||
|
||||
// Map Processor's MachineModel or ProcItin to a CodeGenProcModel index.
|
||||
using ProcModelMapTy = DenseMap<Record*, unsigned>;
|
||||
ProcModelMapTy ProcModelMap;
|
||||
|
||||
// Per-operand SchedReadWrite types.
|
||||
|
@ -443,6 +445,7 @@ class CodeGenSchedModels {
|
|||
InstClassMapTy InstrClassMap;
|
||||
|
||||
std::vector<STIPredicateFunction> STIPredicates;
|
||||
std::vector<unsigned> getAllProcIndices() const;
|
||||
|
||||
public:
|
||||
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
|
||||
|
|
Loading…
Reference in New Issue