[AST] Use data-recursion when building ParentMap, avoid stack overflow.

The following crashes on my system before this patch, but not after:

  void foo(int i) {
    switch (i) {
      case 1:
      case 2:
      ... 100000 cases ...
        ;
    }
  }

  clang-query -c="match stmt(hasAncestor(stmt()))" deep.c

I'm not sure it's actually a sane testcase to run though, it's pretty slow :-)

Differential Revision: https://reviews.llvm.org/D88222
This commit is contained in:
Sam McCall 2020-09-24 14:26:11 +02:00
parent 4b8cb665a1
commit 1ad94624f8
1 changed files with 59 additions and 49 deletions

View File

@ -211,8 +211,6 @@ DynTypedNode createDynTypedNode(const NestedNameSpecifierLoc &Node) {
/// Note that the relationship described here is purely in terms of AST
/// traversal - there are other relationships (for example declaration context)
/// in the AST that are better modeled by special matchers.
///
/// FIXME: Currently only builds up the map using \c Stmt and \c Decl nodes.
class ParentMapContext::ParentMap::ASTVisitor
: public RecursiveASTVisitor<ASTVisitor> {
public:
@ -227,51 +225,60 @@ private:
bool shouldVisitImplicitCode() const { return true; }
/// Record the parent of the node we're visiting.
/// MapNode is the child, the parent is on top of ParentStack.
/// Parents is the parent storage (either PointerParents or OtherParents).
template <typename MapNodeTy, typename MapTy>
void addParent(MapNodeTy MapNode, MapTy *Parents) {
if (ParentStack.empty())
return;
// FIXME: Currently we add the same parent multiple times, but only
// when no memoization data is available for the type.
// For example when we visit all subexpressions of template
// instantiations; this is suboptimal, but benign: the only way to
// visit those is with hasAncestor / hasParent, and those do not create
// new matches.
// The plan is to enable DynTypedNode to be storable in a map or hash
// map. The main problem there is to implement hash functions /
// comparison operators for all types that DynTypedNode supports that
// do not have pointer identity.
auto &NodeOrVector = (*Parents)[MapNode];
if (NodeOrVector.isNull()) {
if (const auto *D = ParentStack.back().get<Decl>())
NodeOrVector = D;
else if (const auto *S = ParentStack.back().get<Stmt>())
NodeOrVector = S;
else
NodeOrVector = new DynTypedNode(ParentStack.back());
} else {
if (!NodeOrVector.template is<ParentVector *>()) {
auto *Vector = new ParentVector(
1, getSingleDynTypedNodeFromParentMap(NodeOrVector));
delete NodeOrVector.template dyn_cast<DynTypedNode *>();
NodeOrVector = Vector;
}
auto *Vector = NodeOrVector.template get<ParentVector *>();
// Skip duplicates for types that have memoization data.
// We must check that the type has memoization data before calling
// std::find() because DynTypedNode::operator== can't compare all
// types.
bool Found = ParentStack.back().getMemoizationData() &&
std::find(Vector->begin(), Vector->end(),
ParentStack.back()) != Vector->end();
if (!Found)
Vector->push_back(ParentStack.back());
}
}
template <typename T, typename MapNodeTy, typename BaseTraverseFn,
typename MapTy>
bool TraverseNode(T Node, MapNodeTy MapNode, BaseTraverseFn BaseTraverse,
MapTy *Parents) {
if (!Node)
return true;
if (ParentStack.size() > 0) {
// FIXME: Currently we add the same parent multiple times, but only
// when no memoization data is available for the type.
// For example when we visit all subexpressions of template
// instantiations; this is suboptimal, but benign: the only way to
// visit those is with hasAncestor / hasParent, and those do not create
// new matches.
// The plan is to enable DynTypedNode to be storable in a map or hash
// map. The main problem there is to implement hash functions /
// comparison operators for all types that DynTypedNode supports that
// do not have pointer identity.
auto &NodeOrVector = (*Parents)[MapNode];
if (NodeOrVector.isNull()) {
if (const auto *D = ParentStack.back().get<Decl>())
NodeOrVector = D;
else if (const auto *S = ParentStack.back().get<Stmt>())
NodeOrVector = S;
else
NodeOrVector = new DynTypedNode(ParentStack.back());
} else {
if (!NodeOrVector.template is<ParentVector *>()) {
auto *Vector = new ParentVector(
1, getSingleDynTypedNodeFromParentMap(NodeOrVector));
delete NodeOrVector.template dyn_cast<DynTypedNode *>();
NodeOrVector = Vector;
}
auto *Vector = NodeOrVector.template get<ParentVector *>();
// Skip duplicates for types that have memoization data.
// We must check that the type has memoization data before calling
// std::find() because DynTypedNode::operator== can't compare all
// types.
bool Found = ParentStack.back().getMemoizationData() &&
std::find(Vector->begin(), Vector->end(),
ParentStack.back()) != Vector->end();
if (!Found)
Vector->push_back(ParentStack.back());
}
}
addParent(MapNode, Parents);
ParentStack.push_back(createDynTypedNode(Node));
bool Result = BaseTraverse();
ParentStack.pop_back();
@ -283,20 +290,12 @@ private:
DeclNode, DeclNode, [&] { return VisitorBase::TraverseDecl(DeclNode); },
&Map.PointerParents);
}
bool TraverseStmt(Stmt *StmtNode) {
return TraverseNode(StmtNode, StmtNode,
[&] { return VisitorBase::TraverseStmt(StmtNode); },
&Map.PointerParents);
}
bool TraverseTypeLoc(TypeLoc TypeLocNode) {
return TraverseNode(
TypeLocNode, DynTypedNode::create(TypeLocNode),
[&] { return VisitorBase::TraverseTypeLoc(TypeLocNode); },
&Map.OtherParents);
}
bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSLocNode) {
return TraverseNode(
NNSLocNode, DynTypedNode::create(NNSLocNode),
@ -304,6 +303,17 @@ private:
&Map.OtherParents);
}
// Using generic TraverseNode for Stmt would prevent data-recursion.
bool dataTraverseStmtPre(Stmt *StmtNode) {
addParent(StmtNode, &Map.PointerParents);
ParentStack.push_back(DynTypedNode::create(*StmtNode));
return true;
}
bool dataTraverseStmtPost(Stmt *StmtNode) {
ParentStack.pop_back();
return true;
}
ParentMap &Map;
llvm::SmallVector<DynTypedNode, 16> ParentStack;
};