llvm-project/clang/lib/ASTMatchers/ASTMatchFinder.cpp

1001 lines
36 KiB
C++
Raw Normal View History

//===--- ASTMatchFinder.cpp - Structural query framework ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Implements an algorithm to efficiently search for matches on AST nodes.
// Uses memoization to support recursive matches like HasDescendant.
//
// The general idea is to visit all AST nodes with a RecursiveASTVisitor,
// calling the Matches(...) method of each matcher we are running on each
// AST node. The matcher can recurse via the ASTMatchFinder interface.
//
//===----------------------------------------------------------------------===//
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Timer.h"
#include <deque>
#include <memory>
#include <set>
namespace clang {
namespace ast_matchers {
namespace internal {
namespace {
typedef MatchFinder::MatchCallback MatchCallback;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
// The maximum number of memoization entries to store.
// 10k has been experimentally found to give a good trade-off
// of performance vs. memory consumption by running matcher
// that match on every statement over a very large codebase.
//
// FIXME: Do some performance optimization in general and
// revisit this number; also, put up micro-benchmarks that we can
// optimize this on.
static const unsigned MaxMemoizationEntries = 10000;
// We use memoization to avoid running the same matcher on the same
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
// AST node twice. This struct is the key for looking up match
// result. It consists of an ID of the MatcherInterface (for
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
// identifying the matcher), a pointer to the AST node and the
// bound nodes before the matcher was executed.
//
// We currently only memoize on nodes whose pointers identify the
// nodes (\c Stmt and \c Decl, but not \c QualType or \c TypeLoc).
// For \c QualType and \c TypeLoc it is possible to implement
// generation of keys for each type.
// FIXME: Benchmark whether memoization of non-pointer typed nodes
// provides enough benefit for the additional amount of code.
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
struct MatchKey {
DynTypedMatcher::MatcherIDType MatcherID;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
ast_type_traits::DynTypedNode Node;
BoundNodesTreeBuilder BoundNodes;
bool operator<(const MatchKey &Other) const {
return std::tie(MatcherID, Node, BoundNodes) <
std::tie(Other.MatcherID, Other.Node, Other.BoundNodes);
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
}
};
// Used to store the result of a match and possibly bound nodes.
struct MemoizedMatchResult {
bool ResultOfMatch;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
BoundNodesTreeBuilder Nodes;
};
// A RecursiveASTVisitor that traverses all children or all descendants of
// a node.
class MatchChildASTVisitor
: public RecursiveASTVisitor<MatchChildASTVisitor> {
public:
typedef RecursiveASTVisitor<MatchChildASTVisitor> VisitorBase;
// Creates an AST visitor that matches 'matcher' on all children or
// descendants of a traversed node. max_depth is the maximum depth
// to traverse: use 1 for matching the children and INT_MAX for
// matching the descendants.
MatchChildASTVisitor(const DynTypedMatcher *Matcher,
ASTMatchFinder *Finder,
BoundNodesTreeBuilder *Builder,
int MaxDepth,
ASTMatchFinder::TraversalKind Traversal,
ASTMatchFinder::BindKind Bind)
: Matcher(Matcher),
Finder(Finder),
Builder(Builder),
CurrentDepth(0),
MaxDepth(MaxDepth),
Traversal(Traversal),
Bind(Bind),
Matches(false) {}
// Returns true if a match is found in the subtree rooted at the
// given AST node. This is done via a set of mutually recursive
// functions. Here's how the recursion is done (the *wildcard can
// actually be Decl, Stmt, or Type):
//
// - Traverse(node) calls BaseTraverse(node) when it needs
// to visit the descendants of node.
// - BaseTraverse(node) then calls (via VisitorBase::Traverse*(node))
// Traverse*(c) for each child c of 'node'.
// - Traverse*(c) in turn calls Traverse(c), completing the
// recursion.
bool findMatch(const ast_type_traits::DynTypedNode &DynNode) {
reset();
if (const Decl *D = DynNode.get<Decl>())
traverse(*D);
else if (const Stmt *S = DynNode.get<Stmt>())
traverse(*S);
else if (const NestedNameSpecifier *NNS =
DynNode.get<NestedNameSpecifier>())
traverse(*NNS);
else if (const NestedNameSpecifierLoc *NNSLoc =
DynNode.get<NestedNameSpecifierLoc>())
traverse(*NNSLoc);
else if (const QualType *Q = DynNode.get<QualType>())
traverse(*Q);
else if (const TypeLoc *T = DynNode.get<TypeLoc>())
traverse(*T);
// FIXME: Add other base types after adding tests.
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
// It's OK to always overwrite the bound nodes, as if there was
// no match in this recursive branch, the result set is empty
// anyway.
*Builder = ResultBindings;
return Matches;
}
// The following are overriding methods from the base visitor class.
// They are public only to allow CRTP to work. They are *not *part
// of the public API of this class.
bool TraverseDecl(Decl *DeclNode) {
ScopedIncrement ScopedDepth(&CurrentDepth);
return (DeclNode == nullptr) || traverse(*DeclNode);
}
bool TraverseStmt(Stmt *StmtNode) {
ScopedIncrement ScopedDepth(&CurrentDepth);
const Stmt *StmtToTraverse = StmtNode;
if (Traversal ==
ASTMatchFinder::TK_IgnoreImplicitCastsAndParentheses) {
const Expr *ExprNode = dyn_cast_or_null<Expr>(StmtNode);
if (ExprNode) {
StmtToTraverse = ExprNode->IgnoreParenImpCasts();
}
}
return (StmtToTraverse == nullptr) || traverse(*StmtToTraverse);
}
// We assume that the QualType and the contained type are on the same
// hierarchy level. Thus, we try to match either of them.
bool TraverseType(QualType TypeNode) {
if (TypeNode.isNull())
return true;
ScopedIncrement ScopedDepth(&CurrentDepth);
// Match the Type.
if (!match(*TypeNode))
return false;
// The QualType is matched inside traverse.
return traverse(TypeNode);
}
// We assume that the TypeLoc, contained QualType and contained Type all are
// on the same hierarchy level. Thus, we try to match all of them.
bool TraverseTypeLoc(TypeLoc TypeLocNode) {
if (TypeLocNode.isNull())
return true;
ScopedIncrement ScopedDepth(&CurrentDepth);
// Match the Type.
if (!match(*TypeLocNode.getType()))
return false;
// Match the QualType.
if (!match(TypeLocNode.getType()))
return false;
// The TypeLoc is matched inside traverse.
return traverse(TypeLocNode);
}
bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) {
ScopedIncrement ScopedDepth(&CurrentDepth);
return (NNS == nullptr) || traverse(*NNS);
}
bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
if (!NNS)
return true;
ScopedIncrement ScopedDepth(&CurrentDepth);
if (!match(*NNS.getNestedNameSpecifier()))
return false;
return traverse(NNS);
}
bool shouldVisitTemplateInstantiations() const { return true; }
bool shouldVisitImplicitCode() const { return true; }
// Disables data recursion. We intercept Traverse* methods in the RAV, which
// are not triggered during data recursion.
bool shouldUseDataRecursionFor(clang::Stmt *S) const { return false; }
private:
// Used for updating the depth during traversal.
struct ScopedIncrement {
explicit ScopedIncrement(int *Depth) : Depth(Depth) { ++(*Depth); }
~ScopedIncrement() { --(*Depth); }
private:
int *Depth;
};
// Resets the state of this object.
void reset() {
Matches = false;
CurrentDepth = 0;
}
// Forwards the call to the corresponding Traverse*() method in the
// base visitor class.
bool baseTraverse(const Decl &DeclNode) {
return VisitorBase::TraverseDecl(const_cast<Decl*>(&DeclNode));
}
bool baseTraverse(const Stmt &StmtNode) {
return VisitorBase::TraverseStmt(const_cast<Stmt*>(&StmtNode));
}
bool baseTraverse(QualType TypeNode) {
return VisitorBase::TraverseType(TypeNode);
}
bool baseTraverse(TypeLoc TypeLocNode) {
return VisitorBase::TraverseTypeLoc(TypeLocNode);
}
bool baseTraverse(const NestedNameSpecifier &NNS) {
return VisitorBase::TraverseNestedNameSpecifier(
const_cast<NestedNameSpecifier*>(&NNS));
}
bool baseTraverse(NestedNameSpecifierLoc NNS) {
return VisitorBase::TraverseNestedNameSpecifierLoc(NNS);
}
// Sets 'Matched' to true if 'Matcher' matches 'Node' and:
// 0 < CurrentDepth <= MaxDepth.
//
// Returns 'true' if traversal should continue after this function
// returns, i.e. if no match is found or 'Bind' is 'BK_All'.
template <typename T>
bool match(const T &Node) {
if (CurrentDepth == 0 || CurrentDepth > MaxDepth) {
return true;
}
if (Bind != ASTMatchFinder::BK_All) {
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
BoundNodesTreeBuilder RecursiveBuilder(*Builder);
if (Matcher->matches(ast_type_traits::DynTypedNode::create(Node), Finder,
&RecursiveBuilder)) {
Matches = true;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
ResultBindings.addMatch(RecursiveBuilder);
return false; // Abort as soon as a match is found.
}
} else {
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
BoundNodesTreeBuilder RecursiveBuilder(*Builder);
if (Matcher->matches(ast_type_traits::DynTypedNode::create(Node), Finder,
&RecursiveBuilder)) {
// After the first match the matcher succeeds.
Matches = true;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
ResultBindings.addMatch(RecursiveBuilder);
}
}
return true;
}
// Traverses the subtree rooted at 'Node'; returns true if the
// traversal should continue after this function returns.
template <typename T>
bool traverse(const T &Node) {
static_assert(IsBaseType<T>::value,
"traverse can only be instantiated with base type");
if (!match(Node))
return false;
return baseTraverse(Node);
}
const DynTypedMatcher *const Matcher;
ASTMatchFinder *const Finder;
BoundNodesTreeBuilder *const Builder;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
BoundNodesTreeBuilder ResultBindings;
int CurrentDepth;
const int MaxDepth;
const ASTMatchFinder::TraversalKind Traversal;
const ASTMatchFinder::BindKind Bind;
bool Matches;
};
// Controls the outermost traversal of the AST and allows to match multiple
// matchers.
class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
public ASTMatchFinder {
public:
MatchASTVisitor(const MatchFinder::MatchersByType *Matchers,
const MatchFinder::MatchFinderOptions &Options)
: Matchers(Matchers), Options(Options), ActiveASTContext(nullptr) {}
~MatchASTVisitor() override {
if (Options.CheckProfiling) {
Options.CheckProfiling->Records = std::move(TimeByBucket);
}
}
void onStartOfTranslationUnit() {
const bool EnableCheckProfiling = Options.CheckProfiling.hasValue();
TimeBucketRegion Timer;
for (MatchCallback *MC : Matchers->AllCallbacks) {
if (EnableCheckProfiling)
Timer.setBucket(&TimeByBucket[MC->getID()]);
MC->onStartOfTranslationUnit();
}
}
void onEndOfTranslationUnit() {
const bool EnableCheckProfiling = Options.CheckProfiling.hasValue();
TimeBucketRegion Timer;
for (MatchCallback *MC : Matchers->AllCallbacks) {
if (EnableCheckProfiling)
Timer.setBucket(&TimeByBucket[MC->getID()]);
MC->onEndOfTranslationUnit();
}
}
void set_active_ast_context(ASTContext *NewActiveASTContext) {
ActiveASTContext = NewActiveASTContext;
}
// The following Visit*() and Traverse*() functions "override"
// methods in RecursiveASTVisitor.
bool VisitTypedefNameDecl(TypedefNameDecl *DeclNode) {
// When we see 'typedef A B', we add name 'B' to the set of names
// A's canonical type maps to. This is necessary for implementing
// isDerivedFrom(x) properly, where x can be the name of the base
// class or any of its aliases.
//
// In general, the is-alias-of (as defined by typedefs) relation
// is tree-shaped, as you can typedef a type more than once. For
// example,
//
// typedef A B;
// typedef A C;
// typedef C D;
// typedef C E;
//
// gives you
//
// A
// |- B
// `- C
// |- D
// `- E
//
// It is wrong to assume that the relation is a chain. A correct
// implementation of isDerivedFrom() needs to recognize that B and
// E are aliases, even though neither is a typedef of the other.
// Therefore, we cannot simply walk through one typedef chain to
// find out whether the type name matches.
const Type *TypeNode = DeclNode->getUnderlyingType().getTypePtr();
const Type *CanonicalType = // root of the typedef tree
ActiveASTContext->getCanonicalType(TypeNode);
TypeAliases[CanonicalType].insert(DeclNode);
return true;
}
bool TraverseDecl(Decl *DeclNode);
bool TraverseStmt(Stmt *StmtNode);
bool TraverseType(QualType TypeNode);
bool TraverseTypeLoc(TypeLoc TypeNode);
bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS);
bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS);
// Matches children or descendants of 'Node' with 'BaseMatcher'.
bool memoizedMatchesRecursively(const ast_type_traits::DynTypedNode &Node,
const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder, int MaxDepth,
TraversalKind Traversal, BindKind Bind) {
// For AST-nodes that don't have an identity, we can't memoize.
if (!Node.getMemoizationData() || !Builder->isComparable())
return matchesRecursively(Node, Matcher, Builder, MaxDepth, Traversal,
Bind);
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
MatchKey Key;
Key.MatcherID = Matcher.getID();
Key.Node = Node;
// Note that we key on the bindings *before* the match.
Key.BoundNodes = *Builder;
MemoizationMap::iterator I = ResultCache.find(Key);
if (I != ResultCache.end()) {
*Builder = I->second.Nodes;
return I->second.ResultOfMatch;
}
MemoizedMatchResult Result;
Result.Nodes = *Builder;
Result.ResultOfMatch = matchesRecursively(Node, Matcher, &Result.Nodes,
MaxDepth, Traversal, Bind);
MemoizedMatchResult &CachedResult = ResultCache[Key];
CachedResult = std::move(Result);
*Builder = CachedResult.Nodes;
return CachedResult.ResultOfMatch;
}
// Matches children or descendants of 'Node' with 'BaseMatcher'.
bool matchesRecursively(const ast_type_traits::DynTypedNode &Node,
const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder, int MaxDepth,
TraversalKind Traversal, BindKind Bind) {
MatchChildASTVisitor Visitor(
&Matcher, this, Builder, MaxDepth, Traversal, Bind);
return Visitor.findMatch(Node);
}
bool classIsDerivedFrom(const CXXRecordDecl *Declaration,
const Matcher<NamedDecl> &Base,
BoundNodesTreeBuilder *Builder) override;
// Implements ASTMatchFinder::matchesChildOf.
bool matchesChildOf(const ast_type_traits::DynTypedNode &Node,
const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder,
TraversalKind Traversal,
BindKind Bind) override {
if (ResultCache.size() > MaxMemoizationEntries)
ResultCache.clear();
return memoizedMatchesRecursively(Node, Matcher, Builder, 1, Traversal,
Bind);
}
// Implements ASTMatchFinder::matchesDescendantOf.
bool matchesDescendantOf(const ast_type_traits::DynTypedNode &Node,
const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder,
BindKind Bind) override {
if (ResultCache.size() > MaxMemoizationEntries)
ResultCache.clear();
return memoizedMatchesRecursively(Node, Matcher, Builder, INT_MAX,
TK_AsIs, Bind);
}
// Implements ASTMatchFinder::matchesAncestorOf.
bool matchesAncestorOf(const ast_type_traits::DynTypedNode &Node,
const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder,
AncestorMatchMode MatchMode) override {
// Reset the cache outside of the recursive call to make sure we
// don't invalidate any iterators.
if (ResultCache.size() > MaxMemoizationEntries)
ResultCache.clear();
return memoizedMatchesAncestorOfRecursively(Node, Matcher, Builder,
MatchMode);
}
// Matches all registered matchers on the given node and calls the
// result callback for every node that matches.
void match(const ast_type_traits::DynTypedNode &Node) {
// FIXME: Improve this with a switch or a visitor pattern.
if (auto *N = Node.get<Decl>()) {
match(*N);
} else if (auto *N = Node.get<Stmt>()) {
match(*N);
} else if (auto *N = Node.get<Type>()) {
match(*N);
} else if (auto *N = Node.get<QualType>()) {
match(*N);
} else if (auto *N = Node.get<NestedNameSpecifier>()) {
match(*N);
} else if (auto *N = Node.get<NestedNameSpecifierLoc>()) {
match(*N);
} else if (auto *N = Node.get<TypeLoc>()) {
match(*N);
}
}
template <typename T> void match(const T &Node) {
matchDispatch(&Node);
}
// Implements ASTMatchFinder::getASTContext.
ASTContext &getASTContext() const override { return *ActiveASTContext; }
bool shouldVisitTemplateInstantiations() const { return true; }
bool shouldVisitImplicitCode() const { return true; }
// Disables data recursion. We intercept Traverse* methods in the RAV, which
// are not triggered during data recursion.
bool shouldUseDataRecursionFor(clang::Stmt *S) const { return false; }
private:
class TimeBucketRegion {
public:
TimeBucketRegion() : Bucket(nullptr) {}
~TimeBucketRegion() { setBucket(nullptr); }
/// \brief Start timing for \p NewBucket.
///
/// If there was a bucket already set, it will finish the timing for that
/// other bucket.
/// \p NewBucket will be timed until the next call to \c setBucket() or
/// until the \c TimeBucketRegion is destroyed.
/// If \p NewBucket is the same as the currently timed bucket, this call
/// does nothing.
void setBucket(llvm::TimeRecord *NewBucket) {
if (Bucket != NewBucket) {
auto Now = llvm::TimeRecord::getCurrentTime(true);
if (Bucket)
*Bucket += Now;
if (NewBucket)
*NewBucket -= Now;
Bucket = NewBucket;
}
}
private:
llvm::TimeRecord *Bucket;
};
/// \brief Runs all the \p Matchers on \p Node.
///
/// Used by \c matchDispatch() below.
template <typename T, typename MC>
void matchWithoutFilter(const T &Node, const MC &Matchers) {
const bool EnableCheckProfiling = Options.CheckProfiling.hasValue();
TimeBucketRegion Timer;
for (const auto &MP : Matchers) {
if (EnableCheckProfiling)
Timer.setBucket(&TimeByBucket[MP.second->getID()]);
BoundNodesTreeBuilder Builder;
if (MP.first.matches(Node, this, &Builder)) {
MatchVisitor Visitor(ActiveASTContext, MP.second);
Builder.visitMatches(&Visitor);
}
}
}
void matchWithFilter(const ast_type_traits::DynTypedNode &DynNode) {
auto Kind = DynNode.getNodeKind();
auto it = MatcherFiltersMap.find(Kind);
const auto &Filter =
it != MatcherFiltersMap.end() ? it->second : getFilterForKind(Kind);
if (Filter.empty())
return;
const bool EnableCheckProfiling = Options.CheckProfiling.hasValue();
TimeBucketRegion Timer;
auto &Matchers = this->Matchers->DeclOrStmt;
for (unsigned short I : Filter) {
auto &MP = Matchers[I];
if (EnableCheckProfiling)
Timer.setBucket(&TimeByBucket[MP.second->getID()]);
BoundNodesTreeBuilder Builder;
if (MP.first.matchesNoKindCheck(DynNode, this, &Builder)) {
MatchVisitor Visitor(ActiveASTContext, MP.second);
Builder.visitMatches(&Visitor);
}
}
}
const std::vector<unsigned short> &
getFilterForKind(ast_type_traits::ASTNodeKind Kind) {
auto &Filter = MatcherFiltersMap[Kind];
auto &Matchers = this->Matchers->DeclOrStmt;
assert((Matchers.size() < USHRT_MAX) && "Too many matchers.");
for (unsigned I = 0, E = Matchers.size(); I != E; ++I) {
if (Matchers[I].first.canMatchNodesOfKind(Kind)) {
Filter.push_back(I);
}
}
return Filter;
}
/// @{
/// \brief Overloads to pair the different node types to their matchers.
void matchDispatch(const Decl *Node) {
return matchWithFilter(ast_type_traits::DynTypedNode::create(*Node));
}
void matchDispatch(const Stmt *Node) {
return matchWithFilter(ast_type_traits::DynTypedNode::create(*Node));
}
void matchDispatch(const Type *Node) {
matchWithoutFilter(QualType(Node, 0), Matchers->Type);
}
void matchDispatch(const TypeLoc *Node) {
matchWithoutFilter(*Node, Matchers->TypeLoc);
}
void matchDispatch(const QualType *Node) {
matchWithoutFilter(*Node, Matchers->Type);
}
void matchDispatch(const NestedNameSpecifier *Node) {
matchWithoutFilter(*Node, Matchers->NestedNameSpecifier);
}
void matchDispatch(const NestedNameSpecifierLoc *Node) {
matchWithoutFilter(*Node, Matchers->NestedNameSpecifierLoc);
}
void matchDispatch(const void *) { /* Do nothing. */ }
/// @}
// Returns whether an ancestor of \p Node matches \p Matcher.
//
// The order of matching ((which can lead to different nodes being bound in
// case there are multiple matches) is breadth first search.
//
// To allow memoization in the very common case of having deeply nested
// expressions inside a template function, we first walk up the AST, memoizing
// the result of the match along the way, as long as there is only a single
// parent.
//
// Once there are multiple parents, the breadth first search order does not
// allow simple memoization on the ancestors. Thus, we only memoize as long
// as there is a single parent.
bool memoizedMatchesAncestorOfRecursively(
const ast_type_traits::DynTypedNode &Node, const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder, AncestorMatchMode MatchMode) {
if (Node.get<TranslationUnitDecl>() ==
ActiveASTContext->getTranslationUnitDecl())
return false;
assert(Node.getMemoizationData() &&
"Invariant broken: only nodes that support memoization may be "
"used in the parent map.");
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
MatchKey Key;
Key.MatcherID = Matcher.getID();
Key.Node = Node;
Key.BoundNodes = *Builder;
// Note that we cannot use insert and reuse the iterator, as recursive
// calls to match might invalidate the result cache iterators.
MemoizationMap::iterator I = ResultCache.find(Key);
if (I != ResultCache.end()) {
*Builder = I->second.Nodes;
return I->second.ResultOfMatch;
}
MemoizedMatchResult Result;
Result.ResultOfMatch = false;
Result.Nodes = *Builder;
const auto &Parents = ActiveASTContext->getParents(Node);
assert(!Parents.empty() && "Found node that is not in the parent map.");
if (Parents.size() == 1) {
// Only one parent - do recursive memoization.
const ast_type_traits::DynTypedNode Parent = Parents[0];
if (Matcher.matches(Parent, this, &Result.Nodes)) {
Result.ResultOfMatch = true;
} else if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
// Reset the results to not include the bound nodes from the failed
// match above.
Result.Nodes = *Builder;
Result.ResultOfMatch = memoizedMatchesAncestorOfRecursively(
Parent, Matcher, &Result.Nodes, MatchMode);
// Once we get back from the recursive call, the result will be the
// same as the parent's result.
}
} else {
// Multiple parents - BFS over the rest of the nodes.
llvm::DenseSet<const void *> Visited;
std::deque<ast_type_traits::DynTypedNode> Queue(Parents.begin(),
Parents.end());
while (!Queue.empty()) {
Result.Nodes = *Builder;
if (Matcher.matches(Queue.front(), this, &Result.Nodes)) {
Result.ResultOfMatch = true;
break;
}
if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
for (const auto &Parent :
ActiveASTContext->getParents(Queue.front())) {
// Make sure we do not visit the same node twice.
// Otherwise, we'll visit the common ancestors as often as there
// are splits on the way down.
if (Visited.insert(Parent.getMemoizationData()).second)
Queue.push_back(Parent);
}
}
Queue.pop_front();
}
}
MemoizedMatchResult &CachedResult = ResultCache[Key];
CachedResult = std::move(Result);
*Builder = CachedResult.Nodes;
return CachedResult.ResultOfMatch;
}
// Implements a BoundNodesTree::Visitor that calls a MatchCallback with
// the aggregated bound nodes for each match.
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
class MatchVisitor : public BoundNodesTreeBuilder::Visitor {
public:
MatchVisitor(ASTContext* Context,
MatchFinder::MatchCallback* Callback)
: Context(Context),
Callback(Callback) {}
void visitMatch(const BoundNodes& BoundNodesView) override {
Callback->run(MatchFinder::MatchResult(BoundNodesView, Context));
}
private:
ASTContext* Context;
MatchFinder::MatchCallback* Callback;
};
// Returns true if 'TypeNode' has an alias that matches the given matcher.
bool typeHasMatchingAlias(const Type *TypeNode,
const Matcher<NamedDecl> Matcher,
BoundNodesTreeBuilder *Builder) {
const Type *const CanonicalType =
ActiveASTContext->getCanonicalType(TypeNode);
for (const TypedefNameDecl *Alias : TypeAliases.lookup(CanonicalType)) {
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
BoundNodesTreeBuilder Result(*Builder);
if (Matcher.matches(*Alias, this, &Result)) {
*Builder = std::move(Result);
return true;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
}
}
return false;
}
/// \brief Bucket to record map.
///
/// Used to get the appropriate bucket for each matcher.
llvm::StringMap<llvm::TimeRecord> TimeByBucket;
const MatchFinder::MatchersByType *Matchers;
/// \brief Filtered list of matcher indices for each matcher kind.
///
/// \c Decl and \c Stmt toplevel matchers usually apply to a specific node
/// kind (and derived kinds) so it is a waste to try every matcher on every
/// node.
/// We precalculate a list of matchers that pass the toplevel restrict check.
/// This also allows us to skip the restrict check at matching time. See
/// use \c matchesNoKindCheck() above.
llvm::DenseMap<ast_type_traits::ASTNodeKind, std::vector<unsigned short>>
MatcherFiltersMap;
const MatchFinder::MatchFinderOptions &Options;
ASTContext *ActiveASTContext;
// Maps a canonical type to its TypedefDecls.
llvm::DenseMap<const Type*, std::set<const TypedefNameDecl*> > TypeAliases;
// Maps (matcher, node) -> the match result for memoization.
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
typedef std::map<MatchKey, MemoizedMatchResult> MemoizationMap;
MemoizationMap ResultCache;
};
static CXXRecordDecl *getAsCXXRecordDecl(const Type *TypeNode) {
// Type::getAs<...>() drills through typedefs.
if (TypeNode->getAs<DependentNameType>() != nullptr ||
TypeNode->getAs<DependentTemplateSpecializationType>() != nullptr ||
TypeNode->getAs<TemplateTypeParmType>() != nullptr)
// Dependent names and template TypeNode parameters will be matched when
// the template is instantiated.
return nullptr;
TemplateSpecializationType const *TemplateType =
TypeNode->getAs<TemplateSpecializationType>();
if (!TemplateType) {
return TypeNode->getAsCXXRecordDecl();
}
if (TemplateType->getTemplateName().isDependent())
// Dependent template specializations will be matched when the
// template is instantiated.
return nullptr;
// For template specialization types which are specializing a template
// declaration which is an explicit or partial specialization of another
// template declaration, getAsCXXRecordDecl() returns the corresponding
// ClassTemplateSpecializationDecl.
//
// For template specialization types which are specializing a template
// declaration which is neither an explicit nor partial specialization of
// another template declaration, getAsCXXRecordDecl() returns NULL and
// we get the CXXRecordDecl of the templated declaration.
CXXRecordDecl *SpecializationDecl = TemplateType->getAsCXXRecordDecl();
if (SpecializationDecl) {
return SpecializationDecl;
}
NamedDecl *Templated =
TemplateType->getTemplateName().getAsTemplateDecl()->getTemplatedDecl();
if (CXXRecordDecl *TemplatedRecord = dyn_cast<CXXRecordDecl>(Templated)) {
return TemplatedRecord;
}
// Now it can still be that we have an alias template.
TypeAliasDecl *AliasDecl = dyn_cast<TypeAliasDecl>(Templated);
assert(AliasDecl);
return getAsCXXRecordDecl(AliasDecl->getUnderlyingType().getTypePtr());
}
// Returns true if the given class is directly or indirectly derived
// from a base type with the given name. A class is not considered to be
// derived from itself.
bool MatchASTVisitor::classIsDerivedFrom(const CXXRecordDecl *Declaration,
const Matcher<NamedDecl> &Base,
BoundNodesTreeBuilder *Builder) {
if (!Declaration->hasDefinition())
return false;
for (const auto &It : Declaration->bases()) {
const Type *TypeNode = It.getType().getTypePtr();
if (typeHasMatchingAlias(TypeNode, Base, Builder))
return true;
CXXRecordDecl *ClassDecl = getAsCXXRecordDecl(TypeNode);
if (!ClassDecl)
continue;
if (ClassDecl == Declaration) {
// This can happen for recursive template definitions; if the
// current declaration did not match, we can safely return false.
return false;
}
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
BoundNodesTreeBuilder Result(*Builder);
if (Base.matches(*ClassDecl, this, &Result)) {
*Builder = std::move(Result);
return true;
Completely revamp node binding for AST matchers. This is in preparation for the backwards references to bound nodes, which will expose a lot more about how matches occur. Main changes: - instead of building the tree of bound nodes, we build a "set" of bound nodes and explode all possible match combinations while running through the matchers; this will allow us to also implement matchers that filter down the current set of matches, like "equalsBoundNode" - take the set of bound nodes at the start of the match into consideration when doing memoization; as part of that, reevaluated that memoization gives us benefits that are large enough (it still does - the effect on common match patterns is up to an order of magnitude) - reset the bound nodes when a node does not match, thus never leaking information from partial sub-matcher matches for failing matchers Effects: - we can now correctly "explode" combinatorial matches, for example: allOf(forEachDescendant(...bind("a")), forEachDescendant(...bind("b"))) will now trigger matches for all combinations of matching "a" and "b"s. - we now never expose bound nodes from partial matches in matchers that did not match in the end - this fixes a long-standing issue FIXMEs: - rename BoundNodesTreeBuilder to BoundNodesBuilder or BoundNodesSetBuilder, as we don't build a tree any more; this is out of scope for this change, though - we're seeing some performance regressions (around 10%), but I expect some performance tuning will get that back, and it's easily worth the increase in expressiveness for now llvm-svn: 184313
2013-06-19 23:42:45 +08:00
}
if (classIsDerivedFrom(ClassDecl, Base, Builder))
return true;
}
return false;
}
bool MatchASTVisitor::TraverseDecl(Decl *DeclNode) {
if (!DeclNode) {
return true;
}
match(*DeclNode);
return RecursiveASTVisitor<MatchASTVisitor>::TraverseDecl(DeclNode);
}
bool MatchASTVisitor::TraverseStmt(Stmt *StmtNode) {
if (!StmtNode) {
return true;
}
match(*StmtNode);
return RecursiveASTVisitor<MatchASTVisitor>::TraverseStmt(StmtNode);
}
bool MatchASTVisitor::TraverseType(QualType TypeNode) {
match(TypeNode);
return RecursiveASTVisitor<MatchASTVisitor>::TraverseType(TypeNode);
}
bool MatchASTVisitor::TraverseTypeLoc(TypeLoc TypeLocNode) {
// The RecursiveASTVisitor only visits types if they're not within TypeLocs.
// We still want to find those types via matchers, so we match them here. Note
// that the TypeLocs are structurally a shadow-hierarchy to the expressed
// type, so we visit all involved parts of a compound type when matching on
// each TypeLoc.
match(TypeLocNode);
match(TypeLocNode.getType());
return RecursiveASTVisitor<MatchASTVisitor>::TraverseTypeLoc(TypeLocNode);
}
bool MatchASTVisitor::TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) {
match(*NNS);
return RecursiveASTVisitor<MatchASTVisitor>::TraverseNestedNameSpecifier(NNS);
}
bool MatchASTVisitor::TraverseNestedNameSpecifierLoc(
NestedNameSpecifierLoc NNS) {
match(NNS);
// We only match the nested name specifier here (as opposed to traversing it)
// because the traversal is already done in the parallel "Loc"-hierarchy.
if (NNS.hasQualifier())
match(*NNS.getNestedNameSpecifier());
return
RecursiveASTVisitor<MatchASTVisitor>::TraverseNestedNameSpecifierLoc(NNS);
}
class MatchASTConsumer : public ASTConsumer {
public:
MatchASTConsumer(MatchFinder *Finder,
MatchFinder::ParsingDoneTestCallback *ParsingDone)
: Finder(Finder), ParsingDone(ParsingDone) {}
private:
void HandleTranslationUnit(ASTContext &Context) override {
if (ParsingDone != nullptr) {
ParsingDone->run();
}
Finder->matchAST(Context);
}
MatchFinder *Finder;
MatchFinder::ParsingDoneTestCallback *ParsingDone;
};
} // end namespace
} // end namespace internal
MatchFinder::MatchResult::MatchResult(const BoundNodes &Nodes,
ASTContext *Context)
: Nodes(Nodes), Context(Context),
SourceManager(&Context->getSourceManager()) {}
MatchFinder::MatchCallback::~MatchCallback() = default;
MatchFinder::ParsingDoneTestCallback::~ParsingDoneTestCallback() = default;
MatchFinder::MatchFinder(MatchFinderOptions Options)
: Options(std::move(Options)), ParsingDone(nullptr) {}
MatchFinder::~MatchFinder() = default;
void MatchFinder::addMatcher(const DeclarationMatcher &NodeMatch,
MatchCallback *Action) {
Matchers.DeclOrStmt.emplace_back(NodeMatch, Action);
Matchers.AllCallbacks.insert(Action);
}
void MatchFinder::addMatcher(const TypeMatcher &NodeMatch,
MatchCallback *Action) {
Matchers.Type.emplace_back(NodeMatch, Action);
Matchers.AllCallbacks.insert(Action);
}
void MatchFinder::addMatcher(const StatementMatcher &NodeMatch,
MatchCallback *Action) {
Matchers.DeclOrStmt.emplace_back(NodeMatch, Action);
Matchers.AllCallbacks.insert(Action);
}
void MatchFinder::addMatcher(const NestedNameSpecifierMatcher &NodeMatch,
MatchCallback *Action) {
Matchers.NestedNameSpecifier.emplace_back(NodeMatch, Action);
Matchers.AllCallbacks.insert(Action);
}
void MatchFinder::addMatcher(const NestedNameSpecifierLocMatcher &NodeMatch,
MatchCallback *Action) {
Matchers.NestedNameSpecifierLoc.emplace_back(NodeMatch, Action);
Matchers.AllCallbacks.insert(Action);
}
void MatchFinder::addMatcher(const TypeLocMatcher &NodeMatch,
MatchCallback *Action) {
Matchers.TypeLoc.emplace_back(NodeMatch, Action);
Matchers.AllCallbacks.insert(Action);
}
bool MatchFinder::addDynamicMatcher(const internal::DynTypedMatcher &NodeMatch,
MatchCallback *Action) {
if (NodeMatch.canConvertTo<Decl>()) {
addMatcher(NodeMatch.convertTo<Decl>(), Action);
return true;
} else if (NodeMatch.canConvertTo<QualType>()) {
addMatcher(NodeMatch.convertTo<QualType>(), Action);
return true;
} else if (NodeMatch.canConvertTo<Stmt>()) {
addMatcher(NodeMatch.convertTo<Stmt>(), Action);
return true;
} else if (NodeMatch.canConvertTo<NestedNameSpecifier>()) {
addMatcher(NodeMatch.convertTo<NestedNameSpecifier>(), Action);
return true;
} else if (NodeMatch.canConvertTo<NestedNameSpecifierLoc>()) {
addMatcher(NodeMatch.convertTo<NestedNameSpecifierLoc>(), Action);
return true;
} else if (NodeMatch.canConvertTo<TypeLoc>()) {
addMatcher(NodeMatch.convertTo<TypeLoc>(), Action);
return true;
}
return false;
}
std::unique_ptr<ASTConsumer> MatchFinder::newASTConsumer() {
return llvm::make_unique<internal::MatchASTConsumer>(this, ParsingDone);
}
void MatchFinder::match(const clang::ast_type_traits::DynTypedNode &Node,
ASTContext &Context) {
internal::MatchASTVisitor Visitor(&Matchers, Options);
Visitor.set_active_ast_context(&Context);
Visitor.match(Node);
}
void MatchFinder::matchAST(ASTContext &Context) {
internal::MatchASTVisitor Visitor(&Matchers, Options);
Visitor.set_active_ast_context(&Context);
Visitor.onStartOfTranslationUnit();
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
Visitor.onEndOfTranslationUnit();
}
void MatchFinder::registerTestCallbackAfterParsing(
MatchFinder::ParsingDoneTestCallback *NewParsingDone) {
ParsingDone = NewParsingDone;
}
StringRef MatchFinder::MatchCallback::getID() const { return "<unknown>"; }
} // end namespace ast_matchers
} // end namespace clang