[Hexagon] Improve the selection algorithm in scalarizeShuffle

Use topological ordering for newly generated nodes.

llvm-svn: 342090
This commit is contained in:
Krzysztof Parzyszek 2018-09-12 22:10:58 +00:00
parent 3a55d1ef27
commit f853741142
1 changed files with 89 additions and 22 deletions

View File

@ -1327,6 +1327,32 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
return vmuxp(Bytes, L, R, Results); return vmuxp(Bytes, L, R, Results);
} }
namespace {
struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
template <typename T>
Deleter(SelectionDAG &D, T &C)
: SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
C.erase(N);
}) {}
};
template <typename T>
struct NullifyingVector : public T {
DenseMap<SDNode*, SDNode**> Refs;
NullifyingVector(T &&V) : T(V) {
for (unsigned i = 0, e = T::size(); i != e; ++i) {
SDNode *&N = T::operator[](i);
Refs[N] = &N;
}
}
void erase(SDNode *N) {
auto F = Refs.find(N);
if (F != Refs.end())
*F->second = nullptr;
}
};
}
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
MVT ResTy, SDValue Va, SDValue Vb, MVT ResTy, SDValue Va, SDValue Vb,
SDNode *N) { SDNode *N) {
@ -1337,6 +1363,24 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
bool HavePairs = (2*HwLen == VecLen); bool HavePairs = (2*HwLen == VecLen);
MVT SingleTy = getSingleVT(MVT::i8); MVT SingleTy = getSingleVT(MVT::i8);
// The prior attempts to handle this shuffle may have left a bunch of
// dead nodes in the DAG (such as constants). These nodes will be added
// at the end of DAG's node list, which at that point had already been
// sorted topologically. In the main selection loop, the node list is
// traversed backwards from the root node, which means that any new
// nodes (from the end of the list) will not be visited.
// Scalarization will replace the shuffle node with the scalarized
// expression, and if that expression reused any if the leftoever (dead)
// nodes, these nodes would not be selected (since the "local" selection
// only visits nodes that are not in AllNodes).
// To avoid this issue, remove all dead nodes from the DAG now.
DAG.RemoveDeadNodes();
DenseSet<SDNode*> AllNodes;
for (SDNode &S : DAG.allnodes())
AllNodes.insert(&S);
Deleter DUA(DAG, AllNodes);
SmallVector<SDValue,128> Ops; SmallVector<SDValue,128> Ops;
LLVMContext &Ctx = *DAG.getContext(); LLVMContext &Ctx = *DAG.getContext();
MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT(); MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
@ -1386,32 +1430,55 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
assert(!N->use_empty()); assert(!N->use_empty());
ISel.ReplaceNode(N, LV.getNode()); ISel.ReplaceNode(N, LV.getNode());
DAG.RemoveDeadNodes();
std::deque<SDNode*> SubNodes; if (AllNodes.count(LV.getNode())) {
SubNodes.push_back(LV.getNode()); DAG.RemoveDeadNodes();
return true;
}
// The lowered build-vector node will now need to be selected. It needs
// to be done here because this node and its submodes are not included
// in the main selection loop.
// Implement essentially the same topological ordering algorithm as is
// used in SelectionDAGISel.
SetVector<SDNode*> SubNodes, TmpQ;
std::map<SDNode*,unsigned> NumOps;
SubNodes.insert(LV.getNode());
for (unsigned I = 0; I != SubNodes.size(); ++I) { for (unsigned I = 0; I != SubNodes.size(); ++I) {
for (SDValue Op : SubNodes[I]->ops()) unsigned OpN = 0;
SubNodes.push_back(Op.getNode()); SDNode *S = SubNodes[I];
for (SDValue Op : S->ops()) {
if (AllNodes.count(Op.getNode()))
continue;
SubNodes.insert(Op.getNode());
++OpN;
}
NumOps.insert({S, OpN});
if (OpN == 0)
TmpQ.insert(S);
} }
while (!SubNodes.empty()) {
SDNode *S = SubNodes.front(); for (unsigned I = 0; I != TmpQ.size(); ++I) {
SubNodes.pop_front(); SDNode *S = TmpQ[I];
if (S->use_empty()) for (SDNode *U : S->uses()) {
continue; if (!SubNodes.count(U))
// This isn't great, but users need to be selected before any nodes that continue;
// they use. (The reason is to match larger patterns, and avoid nodes that auto F = NumOps.find(U);
// cannot be matched on their own, e.g. ValueType, TokenFactor, etc.). assert(F != NumOps.end());
bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) { assert(F->second > 0);
return llvm::any_of(SubNodes, [U](const SDNode *T) { if (!--F->second)
return T == U; TmpQ.insert(F->first);
}); }
}); }
if (PendingUser) assert(SubNodes.size() == TmpQ.size());
SubNodes.push_back(S); NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
else
Deleter DUQ(DAG, Queue);
for (SDNode *S : reverse(Queue))
if (S != nullptr)
ISel.Select(S); ISel.Select(S);
}
DAG.RemoveDeadNodes(); DAG.RemoveDeadNodes();
return true; return true;