forked from OSchip/llvm-project
[Hexagon] Improve the selection algorithm in scalarizeShuffle
Use topological ordering for newly generated nodes. llvm-svn: 342090
This commit is contained in:
parent
3a55d1ef27
commit
f853741142
|
@ -1327,6 +1327,32 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||||
return vmuxp(Bytes, L, R, Results);
|
return vmuxp(Bytes, L, R, Results);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
|
||||||
|
template <typename T>
|
||||||
|
Deleter(SelectionDAG &D, T &C)
|
||||||
|
: SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
|
||||||
|
C.erase(N);
|
||||||
|
}) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct NullifyingVector : public T {
|
||||||
|
DenseMap<SDNode*, SDNode**> Refs;
|
||||||
|
NullifyingVector(T &&V) : T(V) {
|
||||||
|
for (unsigned i = 0, e = T::size(); i != e; ++i) {
|
||||||
|
SDNode *&N = T::operator[](i);
|
||||||
|
Refs[N] = &N;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void erase(SDNode *N) {
|
||||||
|
auto F = Refs.find(N);
|
||||||
|
if (F != Refs.end())
|
||||||
|
*F->second = nullptr;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
||||||
MVT ResTy, SDValue Va, SDValue Vb,
|
MVT ResTy, SDValue Va, SDValue Vb,
|
||||||
SDNode *N) {
|
SDNode *N) {
|
||||||
|
@ -1337,6 +1363,24 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
||||||
bool HavePairs = (2*HwLen == VecLen);
|
bool HavePairs = (2*HwLen == VecLen);
|
||||||
MVT SingleTy = getSingleVT(MVT::i8);
|
MVT SingleTy = getSingleVT(MVT::i8);
|
||||||
|
|
||||||
|
// The prior attempts to handle this shuffle may have left a bunch of
|
||||||
|
// dead nodes in the DAG (such as constants). These nodes will be added
|
||||||
|
// at the end of DAG's node list, which at that point had already been
|
||||||
|
// sorted topologically. In the main selection loop, the node list is
|
||||||
|
// traversed backwards from the root node, which means that any new
|
||||||
|
// nodes (from the end of the list) will not be visited.
|
||||||
|
// Scalarization will replace the shuffle node with the scalarized
|
||||||
|
// expression, and if that expression reused any if the leftoever (dead)
|
||||||
|
// nodes, these nodes would not be selected (since the "local" selection
|
||||||
|
// only visits nodes that are not in AllNodes).
|
||||||
|
// To avoid this issue, remove all dead nodes from the DAG now.
|
||||||
|
DAG.RemoveDeadNodes();
|
||||||
|
DenseSet<SDNode*> AllNodes;
|
||||||
|
for (SDNode &S : DAG.allnodes())
|
||||||
|
AllNodes.insert(&S);
|
||||||
|
|
||||||
|
Deleter DUA(DAG, AllNodes);
|
||||||
|
|
||||||
SmallVector<SDValue,128> Ops;
|
SmallVector<SDValue,128> Ops;
|
||||||
LLVMContext &Ctx = *DAG.getContext();
|
LLVMContext &Ctx = *DAG.getContext();
|
||||||
MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
|
MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
|
||||||
|
@ -1386,32 +1430,55 @@ bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
|
||||||
|
|
||||||
assert(!N->use_empty());
|
assert(!N->use_empty());
|
||||||
ISel.ReplaceNode(N, LV.getNode());
|
ISel.ReplaceNode(N, LV.getNode());
|
||||||
DAG.RemoveDeadNodes();
|
|
||||||
|
|
||||||
std::deque<SDNode*> SubNodes;
|
if (AllNodes.count(LV.getNode())) {
|
||||||
SubNodes.push_back(LV.getNode());
|
DAG.RemoveDeadNodes();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The lowered build-vector node will now need to be selected. It needs
|
||||||
|
// to be done here because this node and its submodes are not included
|
||||||
|
// in the main selection loop.
|
||||||
|
// Implement essentially the same topological ordering algorithm as is
|
||||||
|
// used in SelectionDAGISel.
|
||||||
|
|
||||||
|
SetVector<SDNode*> SubNodes, TmpQ;
|
||||||
|
std::map<SDNode*,unsigned> NumOps;
|
||||||
|
|
||||||
|
SubNodes.insert(LV.getNode());
|
||||||
for (unsigned I = 0; I != SubNodes.size(); ++I) {
|
for (unsigned I = 0; I != SubNodes.size(); ++I) {
|
||||||
for (SDValue Op : SubNodes[I]->ops())
|
unsigned OpN = 0;
|
||||||
SubNodes.push_back(Op.getNode());
|
SDNode *S = SubNodes[I];
|
||||||
|
for (SDValue Op : S->ops()) {
|
||||||
|
if (AllNodes.count(Op.getNode()))
|
||||||
|
continue;
|
||||||
|
SubNodes.insert(Op.getNode());
|
||||||
|
++OpN;
|
||||||
|
}
|
||||||
|
NumOps.insert({S, OpN});
|
||||||
|
if (OpN == 0)
|
||||||
|
TmpQ.insert(S);
|
||||||
}
|
}
|
||||||
while (!SubNodes.empty()) {
|
|
||||||
SDNode *S = SubNodes.front();
|
for (unsigned I = 0; I != TmpQ.size(); ++I) {
|
||||||
SubNodes.pop_front();
|
SDNode *S = TmpQ[I];
|
||||||
if (S->use_empty())
|
for (SDNode *U : S->uses()) {
|
||||||
continue;
|
if (!SubNodes.count(U))
|
||||||
// This isn't great, but users need to be selected before any nodes that
|
continue;
|
||||||
// they use. (The reason is to match larger patterns, and avoid nodes that
|
auto F = NumOps.find(U);
|
||||||
// cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
|
assert(F != NumOps.end());
|
||||||
bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
|
assert(F->second > 0);
|
||||||
return llvm::any_of(SubNodes, [U](const SDNode *T) {
|
if (!--F->second)
|
||||||
return T == U;
|
TmpQ.insert(F->first);
|
||||||
});
|
}
|
||||||
});
|
}
|
||||||
if (PendingUser)
|
assert(SubNodes.size() == TmpQ.size());
|
||||||
SubNodes.push_back(S);
|
NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
|
||||||
else
|
|
||||||
|
Deleter DUQ(DAG, Queue);
|
||||||
|
for (SDNode *S : reverse(Queue))
|
||||||
|
if (S != nullptr)
|
||||||
ISel.Select(S);
|
ISel.Select(S);
|
||||||
}
|
|
||||||
|
|
||||||
DAG.RemoveDeadNodes();
|
DAG.RemoveDeadNodes();
|
||||||
return true;
|
return true;
|
||||||
|
|
Loading…
Reference in New Issue