forked from OSchip/llvm-project
[Hexagon] Improve the selection algorithm in scalarizeShuffle
Use topological ordering for newly generated nodes. llvm-svn: 342090
This commit is contained in:
parent
3a55d1ef27
commit
f853741142
|
@ -1327,6 +1327,32 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
|
|||
return vmuxp(Bytes, L, R, Results);
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
|
||||
template <typename T>
|
||||
Deleter(SelectionDAG &D, T &C)
|
||||
: SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
|
||||
C.erase(N);
|
||||
}) {}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct NullifyingVector : public T {
|
||||
DenseMap<SDNode*, SDNode**> Refs;
|
||||
NullifyingVector(T &&V) : T(V) {
|
||||
for (unsigned i = 0, e = T::size(); i != e; ++i) {
|
||||
SDNode *&N = T::operator[](i);
|
||||
Refs[N] = &N;
|
||||
}
|
||||
}
|
||||
void erase(SDNode *N) {
|
||||
auto F = Refs.find(N);
|
||||
if (F != Refs.end())
|
||||
*F->second = nullptr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
||||
MVT ResTy, SDValue Va, SDValue Vb,
|
||||
SDNode *N) {
|
||||
|
@ -1337,6 +1363,24 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
|||
bool HavePairs = (2*HwLen == VecLen);
|
||||
MVT SingleTy = getSingleVT(MVT::i8);
|
||||
|
||||
// The prior attempts to handle this shuffle may have left a bunch of
|
||||
// dead nodes in the DAG (such as constants). These nodes will be added
|
||||
// at the end of DAG's node list, which at that point had already been
|
||||
// sorted topologically. In the main selection loop, the node list is
|
||||
// traversed backwards from the root node, which means that any new
|
||||
// nodes (from the end of the list) will not be visited.
|
||||
// Scalarization will replace the shuffle node with the scalarized
|
||||
// expression, and if that expression reused any if the leftoever (dead)
|
||||
// nodes, these nodes would not be selected (since the "local" selection
|
||||
// only visits nodes that are not in AllNodes).
|
||||
// To avoid this issue, remove all dead nodes from the DAG now.
|
||||
DAG.RemoveDeadNodes();
|
||||
DenseSet<SDNode*> AllNodes;
|
||||
for (SDNode &S : DAG.allnodes())
|
||||
AllNodes.insert(&S);
|
||||
|
||||
Deleter DUA(DAG, AllNodes);
|
||||
|
||||
SmallVector<SDValue,128> Ops;
|
||||
LLVMContext &Ctx = *DAG.getContext();
|
||||
MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
|
||||
|
@ -1386,32 +1430,55 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
|||
|
||||
assert(!N->use_empty());
|
||||
ISel.ReplaceNode(N, LV.getNode());
|
||||
DAG.RemoveDeadNodes();
|
||||
|
||||
std::deque<SDNode*> SubNodes;
|
||||
SubNodes.push_back(LV.getNode());
|
||||
if (AllNodes.count(LV.getNode())) {
|
||||
DAG.RemoveDeadNodes();
|
||||
return true;
|
||||
}
|
||||
|
||||
// The lowered build-vector node will now need to be selected. It needs
|
||||
// to be done here because this node and its submodes are not included
|
||||
// in the main selection loop.
|
||||
// Implement essentially the same topological ordering algorithm as is
|
||||
// used in SelectionDAGISel.
|
||||
|
||||
SetVector<SDNode*> SubNodes, TmpQ;
|
||||
std::map<SDNode*,unsigned> NumOps;
|
||||
|
||||
SubNodes.insert(LV.getNode());
|
||||
for (unsigned I = 0; I != SubNodes.size(); ++I) {
|
||||
for (SDValue Op : SubNodes[I]->ops())
|
||||
SubNodes.push_back(Op.getNode());
|
||||
unsigned OpN = 0;
|
||||
SDNode *S = SubNodes[I];
|
||||
for (SDValue Op : S->ops()) {
|
||||
if (AllNodes.count(Op.getNode()))
|
||||
continue;
|
||||
SubNodes.insert(Op.getNode());
|
||||
++OpN;
|
||||
}
|
||||
NumOps.insert({S, OpN});
|
||||
if (OpN == 0)
|
||||
TmpQ.insert(S);
|
||||
}
|
||||
while (!SubNodes.empty()) {
|
||||
SDNode *S = SubNodes.front();
|
||||
SubNodes.pop_front();
|
||||
if (S->use_empty())
|
||||
continue;
|
||||
// This isn't great, but users need to be selected before any nodes that
|
||||
// they use. (The reason is to match larger patterns, and avoid nodes that
|
||||
// cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
|
||||
bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
|
||||
return llvm::any_of(SubNodes, [U](const SDNode *T) {
|
||||
return T == U;
|
||||
});
|
||||
});
|
||||
if (PendingUser)
|
||||
SubNodes.push_back(S);
|
||||
else
|
||||
|
||||
for (unsigned I = 0; I != TmpQ.size(); ++I) {
|
||||
SDNode *S = TmpQ[I];
|
||||
for (SDNode *U : S->uses()) {
|
||||
if (!SubNodes.count(U))
|
||||
continue;
|
||||
auto F = NumOps.find(U);
|
||||
assert(F != NumOps.end());
|
||||
assert(F->second > 0);
|
||||
if (!--F->second)
|
||||
TmpQ.insert(F->first);
|
||||
}
|
||||
}
|
||||
assert(SubNodes.size() == TmpQ.size());
|
||||
NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
|
||||
|
||||
Deleter DUQ(DAG, Queue);
|
||||
for (SDNode *S : reverse(Queue))
|
||||
if (S != nullptr)
|
||||
ISel.Select(S);
|
||||
}
|
||||
|
||||
DAG.RemoveDeadNodes();
|
||||
return true;
|
||||
|
|
Loading…
Reference in New Issue