[clangd] Support extraction of binary "subexpressions" like a + [[b + c]].

Summary:
These aren't formally subexpressions in C++, in this case + is left-associative.
However informally +, *, etc are usually (mathematically) associative and users
consider these subexpressions.

We detect these and in simple cases support extracting the partial expression.
As well as builtin associative operators, we assume that overloads of them
are associative and support those too.

Reviewers: SureYeaah

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D65139

llvm-svn: 367121
This commit is contained in:
Sam McCall 2019-07-26 15:29:52 +00:00
parent 736e8aa8ed
commit 91e8eac73a
5 changed files with 254 additions and 17 deletions

View File

@ -452,5 +452,12 @@ const DeclContext& SelectionTree::Node::getDeclContext() const {
llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
}
const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
if (Children.size() == 1 &&
Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
return Children.front()->ignoreImplicit();
return *this;
}
} // namespace clangd
} // namespace clang

View File

@ -103,6 +103,9 @@ public:
const DeclContext& getDeclContext() const;
// Printable node kind, like "CXXRecordDecl" or "AutoTypeLoc".
std::string kind() const;
// If this node is a wrapper with no syntax (e.g. implicit cast), return
// its contents. (If multiple wrappers are present, unwraps all of them).
const Node& ignoreImplicit() const;
};
// The most specific common ancestor of all the selected nodes.
// Returns nullptr if the common ancestor is the root.

View File

@ -27,6 +27,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace clangd {
@ -39,10 +40,14 @@ public:
const clang::Expr *getExpr() const { return Expr; }
const SelectionTree::Node *getExprNode() const { return ExprNode; }
bool isExtractable() const { return Extractable; }
// The half-open range for the expression to be extracted.
SourceRange getExtractionChars() const;
// Generate Replacement for replacing selected expression with given VarName
tooling::Replacement replaceWithVar(llvm::StringRef VarName) const;
tooling::Replacement replaceWithVar(SourceRange Chars,
llvm::StringRef VarName) const;
// Generate Replacement for declaring the selected Expr as a new variable
tooling::Replacement insertDeclaration(llvm::StringRef VarName) const;
tooling::Replacement insertDeclaration(llvm::StringRef VarName,
SourceRange InitChars) const;
private:
bool Extractable = false;
@ -152,23 +157,20 @@ const clang::Stmt *ExtractionContext::computeInsertionPoint() const {
}
return nullptr;
}
// returns the replacement for substituting the extraction with VarName
tooling::Replacement
ExtractionContext::replaceWithVar(llvm::StringRef VarName) const {
const llvm::Optional<SourceRange> ExtractionRng =
toHalfOpenFileRange(SM, Ctx.getLangOpts(), getExpr()->getSourceRange());
unsigned ExtractionLength = SM.getFileOffset(ExtractionRng->getEnd()) -
SM.getFileOffset(ExtractionRng->getBegin());
return tooling::Replacement(SM, ExtractionRng->getBegin(), ExtractionLength,
VarName);
ExtractionContext::replaceWithVar(SourceRange Chars,
llvm::StringRef VarName) const {
unsigned ExtractionLength =
SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
}
// returns the Replacement for declaring a new variable storing the extraction
tooling::Replacement
ExtractionContext::insertDeclaration(llvm::StringRef VarName) const {
const llvm::Optional<SourceRange> ExtractionRng =
toHalfOpenFileRange(SM, Ctx.getLangOpts(), getExpr()->getSourceRange());
assert(ExtractionRng && "ExtractionRng should not be null");
llvm::StringRef ExtractionCode = toSourceCode(SM, *ExtractionRng);
ExtractionContext::insertDeclaration(llvm::StringRef VarName,
SourceRange InitializerChars) const {
llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
const SourceLocation InsertionLoc =
toHalfOpenFileRange(SM, Ctx.getLangOpts(),
InsertionPoint->getSourceRange())
@ -179,6 +181,144 @@ ExtractionContext::insertDeclaration(llvm::StringRef VarName) const {
return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
}
// Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
//
// These are special, because the formal AST doesn't match what users expect:
// - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
// - but extracting `b + c` is reasonable, as + is (mathematically) associative.
//
// So we try to support these cases with some restrictions:
// - the operator must be associative
// - no mixing of operators is allowed
// - we don't look inside macro expansions in the subexpressions
// - we only adjust the extracted range, so references in the unselected parts
// of the AST expression (e.g. `a`) are still considered referenced for
// the purposes of calculating the insertion point.
// FIXME: it would be nice to exclude these references, by micromanaging
// the computeReferencedDecls() calls around the binary operator tree.
// Information extracted about a binary operator encounted in a SelectionTree.
// It can represent either an overloaded or built-in operator.
struct ParsedBinaryOperator {
BinaryOperatorKind Kind;
SourceLocation ExprLoc;
llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands;
// If N is a binary operator, populate this and return true.
bool parse(const SelectionTree::Node &N) {
SelectedOperands.clear();
if (const BinaryOperator *Op =
llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
Kind = Op->getOpcode();
ExprLoc = Op->getExprLoc();
SelectedOperands = N.Children;
return true;
}
if (const CXXOperatorCallExpr *Op =
llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
N.ASTNode.get<Expr>())) {
if (!Op->isInfixBinaryOp())
return false;
Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
ExprLoc = Op->getExprLoc();
// Not all children are args, there's also the callee (operator).
for (const auto* Child : N.Children) {
const Expr *E = Child->ASTNode.get<Expr>();
assert(E && "callee and args should be Exprs!");
if (E == Op->getArg(0) || E == Op->getArg(1))
SelectedOperands.push_back(Child);
}
return true;
}
return false;
}
bool associative() const {
// Must also be left-associative, or update getBinaryOperatorRange()!
switch (Kind) {
case BO_Add:
case BO_Mul:
case BO_And:
case BO_Or:
case BO_Xor:
case BO_LAnd:
case BO_LOr:
return true;
default:
return false;
}
}
bool crossesMacroBoundary(const SourceManager &SM) {
FileID F = SM.getFileID(ExprLoc);
for (const SelectionTree::Node *Child : SelectedOperands)
if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F)
return true;
return false;
}
};
// If have an associative operator at the top level, then we must find
// the start point (rightmost in LHS) and end point (leftmost in RHS).
// We can only descend into subtrees where the operator matches.
//
// e.g. for a + [[b + c]] + d
// +
// / \
// N-> + d
// / \
// + c <- End
// / \
// a b <- Start
const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N,
const SourceManager &SM,
const LangOptions &LangOpts) {
// If N is not a suitable binary operator, bail out.
ParsedBinaryOperator Op;
if (!Op.parse(N.ignoreImplicit()) || !Op.associative() ||
Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2)
return SourceRange();
BinaryOperatorKind OuterOp = Op.Kind;
// Because the tree we're interested in contains only one operator type, and
// all eligible operators are left-associative, the shape of the tree is
// very restricted: it's a linked list along the left edges.
// This simplifies our implementation.
const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS
const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS
// End is already correct: it can't be an OuterOp (as it's left-associative).
// Start needs to be pushed down int the subtree to the right spot.
while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp &&
!Op.crossesMacroBoundary(SM)) {
assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected
Start = Op.SelectedOperands.back();
break;
}
// Op.LHS is (at least partially) selected, so descend into it.
Start = Op.SelectedOperands.front();
}
return SourceRange(
toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange())
->getBegin(),
toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange())
->getEnd());
}
SourceRange ExtractionContext::getExtractionChars() const {
// Special case: we're extracting an associative binary subexpression.
SourceRange BinaryOperatorRange =
getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts());
if (BinaryOperatorRange.isValid())
return BinaryOperatorRange;
// Usual case: we're extracting the whole expression.
return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
}
/// Extracts an expression to the variable dummy
/// Before:
/// int x = 5 + 4 * 3;
@ -218,11 +358,12 @@ Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) {
tooling::Replacements Result;
// FIXME: get variable name from user or suggest based on type
std::string VarName = "dummy";
SourceRange Range = Target->getExtractionChars();
// insert new variable declaration
if (auto Err = Result.add(Target->insertDeclaration(VarName)))
if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
return std::move(Err);
// replace expression with variable name
if (auto Err = Result.add(Target->replaceWithVar(VarName)))
if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
return std::move(Err);
return Effect::applyEdit(Result);
}

View File

@ -343,6 +343,23 @@ TEST(SelectionTest, Selected) {
}
}
TEST(SelectionTest, Implicit) {
const char* Test = R"cpp(
struct S { S(const char*); };
int f(S);
int x = f("^");
)cpp";
auto AST = TestTU::withCode(Annotations(Test).code()).build();
auto T = makeSelectionTree(Test, AST);
const SelectionTree::Node *Str = T.commonAncestor();
EXPECT_EQ("StringLiteral", nodeKind(Str)) << "Implicit selected?";
EXPECT_EQ("ImplicitCastExpr", nodeKind(Str->Parent));
EXPECT_EQ("CXXConstructExpr", nodeKind(Str->Parent->Parent));
EXPECT_EQ(Str, &Str->Parent->Parent->ignoreImplicit())
<< "Didn't unwrap " << nodeKind(&Str->Parent->Parent->ignoreImplicit());
}
} // namespace
} // namespace clangd
} // namespace clang

View File

@ -476,10 +476,79 @@ TEST(TweakTest, ExtractVariable) {
R"cpp(int f() {
auto dummy = f(); return dummy;
})cpp"},
// FIXME: Wrong result for \[\[clang::uninitialized\]\] int b = [[1]];
// since the attr is inside the DeclStmt and the bounds of
// DeclStmt don't cover the attribute.
// Binary subexpressions
{R"cpp(void f() {
int x = 1 + [[2 + 3 + 4]] + 5;
})cpp",
R"cpp(void f() {
auto dummy = 2 + 3 + 4; int x = 1 + dummy + 5;
})cpp"},
{R"cpp(void f() {
int x = [[1 + 2 + 3]] + 4 + 5;
})cpp",
R"cpp(void f() {
auto dummy = 1 + 2 + 3; int x = dummy + 4 + 5;
})cpp"},
{R"cpp(void f() {
int x = 1 + 2 + [[3 + 4 + 5]];
})cpp",
R"cpp(void f() {
auto dummy = 3 + 4 + 5; int x = 1 + 2 + dummy;
})cpp"},
// Non-associative operations have no special support
{R"cpp(void f() {
int x = 1 - [[2 - 3 - 4]] - 5;
})cpp",
R"cpp(void f() {
auto dummy = 1 - 2 - 3 - 4; int x = dummy - 5;
})cpp"},
// A mix of associative operators isn't associative.
{R"cpp(void f() {
int x = 0 + 1 * [[2 + 3]] * 4 + 5;
})cpp",
R"cpp(void f() {
auto dummy = 1 * 2 + 3 * 4; int x = 0 + dummy + 5;
})cpp"},
// Overloaded operators are supported, we assume associativity
// as if they were built-in.
{R"cpp(struct S {
S(int);
};
S operator+(S, S);
void f() {
S x = S(1) + [[S(2) + S(3) + S(4)]] + S(5);
})cpp",
R"cpp(struct S {
S(int);
};
S operator+(S, S);
void f() {
auto dummy = S(2) + S(3) + S(4); S x = S(1) + dummy + S(5);
})cpp"},
// Don't try to analyze across macro boundaries
// FIXME: it'd be nice to do this someday (in a safe way)
{R"cpp(#define ECHO(X) X
void f() {
int x = 1 + [[ECHO(2 + 3) + 4]] + 5;
})cpp",
R"cpp(#define ECHO(X) X
void f() {
auto dummy = 1 + ECHO(2 + 3) + 4; int x = dummy + 5;
})cpp"},
{R"cpp(#define ECHO(X) X
void f() {
int x = 1 + [[ECHO(2) + ECHO(3) + 4]] + 5;
})cpp",
R"cpp(#define ECHO(X) X
void f() {
auto dummy = 1 + ECHO(2) + ECHO(3) + 4; int x = dummy + 5;
})cpp"},
};
for (const auto &IO : InputOutputs) {
checkTransform(ID, IO.first, IO.second);