forked from OSchip/llvm-project
[TableGen][SchedModels] Fix read/write variant substitution
Patch fixes case when sched class has write and read variants belonging to different processor models. Differential revision: https://reviews.llvm.org/D89777
This commit is contained in:
parent
ff2e24a741
commit
cc96a82291
|
@ -270,7 +270,11 @@ def : ReadAdvance<ReadMUL, 0>;
|
|||
// from similar μops, allowing a typical sequence of multiply-accumulate μops
|
||||
// to issue one every 1 cycle (sched advance = 2).
|
||||
def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
|
||||
def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
|
||||
def A57WriteMLAL : SchedWriteVariant<[
|
||||
SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
|
||||
SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
|
||||
]>;
|
||||
|
||||
def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
|
||||
|
||||
def : InstRW<[A57WriteMLA],
|
||||
|
|
|
@ -1421,9 +1421,9 @@
|
|||
# CHECK-NEXT: 1 3 1.00 smladeq r2, r3, r5, r8
|
||||
# CHECK-NEXT: 1 3 1.00 smladxhi r2, r3, r5, r8
|
||||
# CHECK-NEXT: 2 4 2.00 smlal r2, r3, r5, r8
|
||||
# CHECK-NEXT: 2 4 2.00 smlals r2, r3, r5, r8
|
||||
# CHECK-NEXT: 4 5 2.00 smlals r2, r3, r5, r8
|
||||
# CHECK-NEXT: 2 4 2.00 smlaleq r2, r3, r5, r8
|
||||
# CHECK-NEXT: 2 4 2.00 smlalshi r2, r3, r5, r8
|
||||
# CHECK-NEXT: 4 5 2.00 smlalshi r2, r3, r5, r8
|
||||
# CHECK-NEXT: 2 4 2.00 smlalbb r3, r1, r9, r0
|
||||
# CHECK-NEXT: 2 4 2.00 smlalbt r5, r6, r4, r1
|
||||
# CHECK-NEXT: 2 4 2.00 smlaltb r4, r2, r3, r2
|
||||
|
@ -1634,12 +1634,12 @@
|
|||
# CHECK-NEXT: 2 4 2.00 umaallt r3, r4, r5, r6
|
||||
# CHECK-NEXT: 2 4 2.00 umlal r2, r4, r6, r8
|
||||
# CHECK-NEXT: 2 4 2.00 umlalgt r6, r1, r2, r6
|
||||
# CHECK-NEXT: 2 4 2.00 umlals r2, r9, r2, r3
|
||||
# CHECK-NEXT: 2 4 2.00 umlalseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: 4 5 2.00 umlals r2, r9, r2, r3
|
||||
# CHECK-NEXT: 4 5 2.00 umlalseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: 2 4 2.00 umull r2, r4, r6, r8
|
||||
# CHECK-NEXT: 2 4 2.00 umullgt r6, r1, r2, r6
|
||||
# CHECK-NEXT: 2 4 2.00 umulls r2, r9, r2, r3
|
||||
# CHECK-NEXT: 2 4 2.00 umullseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: 4 5 2.00 umulls r2, r9, r2, r3
|
||||
# CHECK-NEXT: 4 5 2.00 umullseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: 1 2 1.00 uqadd16 r1, r2, r3
|
||||
# CHECK-NEXT: 1 2 1.00 uqadd16gt r4, r7, r9
|
||||
# CHECK-NEXT: 1 2 1.00 uqadd8 r3, r4, r8
|
||||
|
@ -1719,7 +1719,7 @@
|
|||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6]
|
||||
# CHECK-NEXT: 8.00 133.00 133.00 53.00 522.00 12.00 - -
|
||||
# CHECK-NEXT: 8.00 139.00 139.00 53.00 522.00 12.00 - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions:
|
||||
|
@ -2285,9 +2285,9 @@
|
|||
# CHECK-NEXT: - - - - 1.00 - - - smladeq r2, r3, r5, r8
|
||||
# CHECK-NEXT: - - - - 1.00 - - - smladxhi r2, r3, r5, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlal r2, r3, r5, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlals r2, r3, r5, r8
|
||||
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlals r2, r3, r5, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlaleq r2, r3, r5, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlalshi r2, r3, r5, r8
|
||||
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlalshi r2, r3, r5, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlalbb r3, r1, r9, r0
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlalbt r5, r6, r4, r1
|
||||
# CHECK-NEXT: - - - - 2.00 - - - smlaltb r4, r2, r3, r2
|
||||
|
@ -2498,12 +2498,12 @@
|
|||
# CHECK-NEXT: - - - - 2.00 - - - umaallt r3, r4, r5, r6
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umlal r2, r4, r6, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umlalgt r6, r1, r2, r6
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umlals r2, r9, r2, r3
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umlalseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlals r2, r9, r2, r3
|
||||
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlalseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umull r2, r4, r6, r8
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umullgt r6, r1, r2, r6
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umulls r2, r9, r2, r3
|
||||
# CHECK-NEXT: - - - - 2.00 - - - umullseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umulls r2, r9, r2, r3
|
||||
# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umullseq r3, r5, r1, r2
|
||||
# CHECK-NEXT: - - - - 1.00 - - - uqadd16 r1, r2, r3
|
||||
# CHECK-NEXT: - - - - 1.00 - - - uqadd16gt r4, r7, r9
|
||||
# CHECK-NEXT: - - - - 1.00 - - - uqadd8 r3, r4, r8
|
||||
|
|
|
@ -1315,6 +1315,16 @@ struct PredTransition {
|
|||
SmallVector<SmallVector<unsigned,4>, 16> WriteSequences;
|
||||
SmallVector<SmallVector<unsigned,4>, 16> ReadSequences;
|
||||
SmallVector<unsigned, 4> ProcIndices;
|
||||
|
||||
PredTransition() = default;
|
||||
PredTransition(ArrayRef<PredCheck> PT) {
|
||||
PredTerm.assign(PT.begin(), PT.end());
|
||||
ProcIndices.assign(1, 0);
|
||||
}
|
||||
PredTransition(ArrayRef<PredCheck> PT, ArrayRef<unsigned> PIds) {
|
||||
PredTerm.assign(PT.begin(), PT.end());
|
||||
ProcIndices.assign(PIds.begin(), PIds.end());
|
||||
}
|
||||
};
|
||||
|
||||
// Encapsulate a set of partially constructed transitions.
|
||||
|
@ -1328,7 +1338,8 @@ public:
|
|||
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
|
||||
|
||||
void substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
|
||||
bool IsRead, unsigned StartIdx);
|
||||
bool IsRead, bool IsForAnyCPU,
|
||||
unsigned StartIdx);
|
||||
|
||||
void substituteVariants(const PredTransition &Trans);
|
||||
|
||||
|
@ -1568,7 +1579,20 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
|
|||
// starts. RWSeq must be applied to all transitions between StartIdx and the end
|
||||
// of TransVec.
|
||||
void PredTransitions::substituteVariantOperand(
|
||||
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
|
||||
const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
|
||||
unsigned StartIdx) {
|
||||
|
||||
auto CollectAndAddVariants = [&](unsigned TransIdx,
|
||||
const CodeGenSchedRW &SchedRW) {
|
||||
// Distribute this partial PredTransition across intersecting variants.
|
||||
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
|
||||
std::vector<TransVariant> IntersectingVariants;
|
||||
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
|
||||
// Now expand each variant on top of its copy of the transition.
|
||||
for (const TransVariant &IV : IntersectingVariants)
|
||||
pushVariant(IV, IsRead);
|
||||
return !IntersectingVariants.empty();
|
||||
};
|
||||
|
||||
// Visit each original RW within the current sequence.
|
||||
for (SmallVectorImpl<unsigned>::const_iterator
|
||||
|
@ -1577,6 +1601,7 @@ void PredTransitions::substituteVariantOperand(
|
|||
// Push this RW on all partial PredTransitions or distribute variants.
|
||||
// New PredTransitions may be pushed within this loop which should not be
|
||||
// revisited (TransEnd must be loop invariant).
|
||||
bool HasAliases = false, WasPushed = false;
|
||||
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
|
||||
TransIdx != TransEnd; ++TransIdx) {
|
||||
// In the common case, push RW onto the current operand's sequence.
|
||||
|
@ -1587,17 +1612,22 @@ void PredTransitions::substituteVariantOperand(
|
|||
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
|
||||
continue;
|
||||
}
|
||||
// Distribute this partial PredTransition across intersecting variants.
|
||||
// This will push a copies of TransVec[TransIdx] on the back of TransVec.
|
||||
std::vector<TransVariant> IntersectingVariants;
|
||||
getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
|
||||
// Now expand each variant on top of its copy of the transition.
|
||||
for (std::vector<TransVariant>::const_iterator
|
||||
IVI = IntersectingVariants.begin(),
|
||||
IVE = IntersectingVariants.end();
|
||||
IVI != IVE; ++IVI) {
|
||||
pushVariant(*IVI, IsRead);
|
||||
}
|
||||
HasAliases = true;
|
||||
WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
|
||||
}
|
||||
if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
|
||||
// If we're here this means that in some sched class:
|
||||
// a) We have read variant for CPU A
|
||||
// b) We have write variant for CPU B
|
||||
// b) We don't have write variant for CPU A
|
||||
// d) We must expand all read/write variants (IsForAnyCPU is true)
|
||||
// e) We couldn't expand SchedRW because TransVec doesn't have
|
||||
// any transition with compatible CPU ID.
|
||||
// In such case we create new empty transition with zero (AnyCPU)
|
||||
// index.
|
||||
TransVec.emplace_back(TransVec[StartIdx].PredTerm);
|
||||
TransVec.back().ReadSequences.emplace_back();
|
||||
CollectAndAddVariants(TransVec.size() - 1, SchedRW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1612,10 +1642,9 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
|
|||
// Build up a set of partial results starting at the back of
|
||||
// PredTransitions. Remember the first new transition.
|
||||
unsigned StartIdx = TransVec.size();
|
||||
TransVec.emplace_back();
|
||||
TransVec.back().PredTerm = Trans.PredTerm;
|
||||
TransVec.back().ProcIndices = Trans.ProcIndices;
|
||||
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
|
||||
|
||||
bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
|
||||
// Visit each original write sequence.
|
||||
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
|
||||
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
|
||||
|
@ -1625,7 +1654,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
|
|||
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
|
||||
I->WriteSequences.emplace_back();
|
||||
}
|
||||
substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
|
||||
substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
|
||||
}
|
||||
// Visit each original read sequence.
|
||||
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
|
||||
|
@ -1636,7 +1665,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
|
|||
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
|
||||
I->ReadSequences.emplace_back();
|
||||
}
|
||||
substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
|
||||
substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue