New C++ function name parsing logic

Current implementation of CPlusPlusLanguage::MethodName::Parse() doesn't
get anywhere close to covering full extent of possible function declarations.
It causes incorrect behavior in avoid-stepping and sometimes messes
printing of thread backtrace.

This change implements more methodical parsing logic based on clang
lexer and simple recursive parser.

Examples:
void std::vector<Class, std::allocator<Class>>::_M_emplace_back_aux<Class const&>(Class const&)
void (*&std::_Any_data::_M_access<void (*)()>())()

Differential Revision: https://reviews.llvm.org/D31451

llvm-svn: 299374
This commit is contained in:
Eugene Zemtsov 2017-04-03 18:59:34 +00:00
parent 0a5e55e819
commit 699a748893
6 changed files with 976 additions and 129 deletions

View File

@ -1,6 +1,7 @@
add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN
BlockPointer.cpp
CPlusPlusLanguage.cpp
CPlusPlusNameParser.cpp
CxxStringTypes.cpp
LibCxx.cpp
LibCxxAtomic.cpp

View File

@ -21,7 +21,6 @@
// Other libraries and framework includes
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Threading.h"
// Project includes
#include "lldb/Core/PluginManager.h"
@ -36,6 +35,7 @@
#include "lldb/Utility/RegularExpression.h"
#include "BlockPointer.h"
#include "CPlusPlusNameParser.h"
#include "CxxStringTypes.h"
#include "LibCxx.h"
#include "LibCxxAtomic.h"
@ -85,15 +85,14 @@ void CPlusPlusLanguage::MethodName::Clear() {
m_context = llvm::StringRef();
m_arguments = llvm::StringRef();
m_qualifiers = llvm::StringRef();
m_type = eTypeInvalid;
m_parsed = false;
m_parse_error = false;
}
bool ReverseFindMatchingChars(const llvm::StringRef &s,
const llvm::StringRef &left_right_chars,
size_t &left_pos, size_t &right_pos,
size_t pos = llvm::StringRef::npos) {
static bool ReverseFindMatchingChars(const llvm::StringRef &s,
const llvm::StringRef &left_right_chars,
size_t &left_pos, size_t &right_pos,
size_t pos = llvm::StringRef::npos) {
assert(left_right_chars.size() == 2);
left_pos = llvm::StringRef::npos;
const char left_char = left_right_chars[0];
@ -119,10 +118,9 @@ bool ReverseFindMatchingChars(const llvm::StringRef &s,
return false;
}
static bool IsValidBasename(const llvm::StringRef &basename) {
// Check that the basename matches with the following regular expression or is
// an operator name:
// "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$"
static bool IsTrivialBasename(const llvm::StringRef &basename) {
// Check that the basename matches with the following regular expression
// "^~?([A-Za-z_][A-Za-z_0-9]*)$"
// We are using a hand written implementation because it is significantly more
// efficient then
// using the general purpose regular expression library.
@ -149,100 +147,69 @@ static bool IsValidBasename(const llvm::StringRef &basename) {
if (idx == basename.size())
return true;
// Check for basename with template arguments
// TODO: Improve the quality of the validation with validating the template
// arguments
if (basename[idx] == '<' && basename.back() == '>')
return true;
return false;
}
// Check if the basename is a vaild C++ operator name
if (!basename.startswith("operator"))
return false;
bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() {
// This method tries to parse simple method definitions
// which are presumably most comman in user programs.
// Definitions that can be parsed by this function don't have return types
// and templates in the name.
// A::B::C::fun(std::vector<T> &) const
size_t arg_start, arg_end;
llvm::StringRef full(m_full.GetCString());
llvm::StringRef parens("()", 2);
if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
if (arg_end + 1 < full.size())
m_qualifiers = full.substr(arg_end + 1).ltrim();
static RegularExpression g_operator_regex(
llvm::StringRef("^(operator)( "
"?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|"
"\\[\\]|[\\^<>=!\\/"
"*+-]+)(<.*>)?(\\[\\])?$"));
std::string basename_str(basename.str());
return g_operator_regex.Execute(basename_str, nullptr);
if (arg_start == 0)
return false;
size_t basename_end = arg_start;
size_t context_start = 0;
size_t context_end = full.rfind(':', basename_end);
if (context_end == llvm::StringRef::npos)
m_basename = full.substr(0, basename_end);
else {
if (context_start < context_end)
m_context = full.substr(context_start, context_end - 1 - context_start);
const size_t basename_begin = context_end + 1;
m_basename = full.substr(basename_begin, basename_end - basename_begin);
}
if (IsTrivialBasename(m_basename)) {
return true;
} else {
// The C++ basename doesn't match our regular expressions so this can't
// be a valid C++ method, clear everything out and indicate an error
m_context = llvm::StringRef();
m_basename = llvm::StringRef();
m_arguments = llvm::StringRef();
m_qualifiers = llvm::StringRef();
return false;
}
}
return false;
}
void CPlusPlusLanguage::MethodName::Parse() {
if (!m_parsed && m_full) {
// ConstString mangled;
// m_full.GetMangledCounterpart(mangled);
// printf ("\n parsing = '%s'\n", m_full.GetCString());
// if (mangled)
// printf (" mangled = '%s'\n", mangled.GetCString());
m_parse_error = false;
m_parsed = true;
llvm::StringRef full(m_full.GetCString());
size_t arg_start, arg_end;
llvm::StringRef parens("()", 2);
if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
if (arg_end + 1 < full.size())
m_qualifiers = full.substr(arg_end + 1);
if (arg_start > 0) {
size_t basename_end = arg_start;
size_t context_start = 0;
size_t context_end = llvm::StringRef::npos;
if (basename_end > 0 && full[basename_end - 1] == '>') {
// TODO: handle template junk...
// Templated function
size_t template_start, template_end;
llvm::StringRef lt_gt("<>", 2);
if (ReverseFindMatchingChars(full, lt_gt, template_start,
template_end, basename_end)) {
// Check for templated functions that include return type like:
// 'void foo<Int>()'
context_start = full.rfind(' ', template_start);
if (context_start == llvm::StringRef::npos)
context_start = 0;
else
++context_start;
context_end = full.rfind(':', template_start);
if (context_end == llvm::StringRef::npos ||
context_end < context_start)
context_end = context_start;
} else {
context_end = full.rfind(':', basename_end);
}
} else if (context_end == llvm::StringRef::npos) {
context_end = full.rfind(':', basename_end);
}
if (context_end == llvm::StringRef::npos)
m_basename = full.substr(0, basename_end);
else {
if (context_start < context_end)
m_context =
full.substr(context_start, context_end - 1 - context_start);
const size_t basename_begin = context_end + 1;
m_basename =
full.substr(basename_begin, basename_end - basename_begin);
}
m_type = eTypeUnknownMethod;
if (TrySimplifiedParse()) {
m_parse_error = false;
} else {
CPlusPlusNameParser parser(m_full.GetStringRef());
if (auto function = parser.ParseAsFunctionDefinition()) {
m_basename = function.getValue().name.basename;
m_context = function.getValue().name.context;
m_arguments = function.getValue().arguments;
m_qualifiers = function.getValue().qualifiers;
m_parse_error = false;
} else {
m_parse_error = true;
return;
}
if (!IsValidBasename(m_basename)) {
// The C++ basename doesn't match our regular expressions so this can't
// be a valid C++ method, clear everything out and indicate an error
m_context = llvm::StringRef();
m_basename = llvm::StringRef();
m_arguments = llvm::StringRef();
m_qualifiers = llvm::StringRef();
m_parse_error = true;
}
} else {
m_parse_error = true;
}
m_parsed = true;
}
}
@ -273,14 +240,13 @@ llvm::StringRef CPlusPlusLanguage::MethodName::GetQualifiers() {
std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() {
if (!m_parsed)
Parse();
if (m_basename.empty() || m_context.empty())
return std::string();
if (m_context.empty())
return m_basename;
std::string res;
res += m_context;
res += "::";
res += m_basename;
return res;
}
@ -296,13 +262,10 @@ bool CPlusPlusLanguage::IsCPPMangledName(const char *name) {
bool CPlusPlusLanguage::ExtractContextAndIdentifier(
const char *name, llvm::StringRef &context, llvm::StringRef &identifier) {
static RegularExpression g_basename_regex(llvm::StringRef(
"^(([A-Za-z_][A-Za-z_0-9]*::)*)(~?[A-Za-z_~][A-Za-z_0-9]*)$"));
RegularExpression::Match match(4);
if (g_basename_regex.Execute(llvm::StringRef::withNullAsEmpty(name),
&match)) {
match.GetMatchAtIndex(name, 1, context);
match.GetMatchAtIndex(name, 3, identifier);
CPlusPlusNameParser parser(name);
if (auto full_name = parser.ParseAsFullName()) {
identifier = full_name.getValue().basename;
context = full_name.getValue().context;
return true;
}
return false;

View File

@ -29,20 +29,13 @@ class CPlusPlusLanguage : public Language {
public:
class MethodName {
public:
enum Type {
eTypeInvalid,
eTypeUnknownMethod,
eTypeClassMethod,
eTypeInstanceMethod
};
MethodName()
: m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers(),
m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {}
m_parsed(false), m_parse_error(false) {}
MethodName(const ConstString &s)
: m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(),
m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {}
m_parsed(false), m_parse_error(false) {}
void Clear();
@ -51,13 +44,9 @@ public:
Parse();
if (m_parse_error)
return false;
if (m_type == eTypeInvalid)
return false;
return (bool)m_full;
}
Type GetType() const { return m_type; }
const ConstString &GetFullName() const { return m_full; }
std::string GetScopeQualifiedName();
@ -72,6 +61,7 @@ public:
protected:
void Parse();
bool TrySimplifiedParse();
ConstString m_full; // Full name:
// "lldb::SBTarget::GetBreakpointAtIndex(unsigned int)
@ -80,7 +70,6 @@ public:
llvm::StringRef m_context; // Decl context: "lldb::SBTarget"
llvm::StringRef m_arguments; // Arguments: "(unsigned int)"
llvm::StringRef m_qualifiers; // Qualifiers: "const"
Type m_type;
bool m_parsed;
bool m_parse_error;
};
@ -121,7 +110,7 @@ public:
// If the name is a lone C identifier (e.g. C) or a qualified C identifier
// (e.g. A::B::C) it will return true,
// and identifier will be the identifier (C and C respectively) and the
// context will be "" and "A::B::" respectively.
// context will be "" and "A::B" respectively.
// If the name fails the heuristic matching for a qualified or unqualified
// C/C++ identifier, then it will return false
// and identifier and context will be unchanged.

View File

@ -0,0 +1,614 @@
//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "CPlusPlusNameParser.h"
#include "clang/Basic/IdentifierTable.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Threading.h"
using namespace lldb;
using namespace lldb_private;
using llvm::Optional;
using llvm::None;
using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
namespace tok = clang::tok;
Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
m_next_token_index = 0;
Optional<ParsedFunction> result(None);
// Try to parse the name as function without a return type specified
// e.g. main(int, char*[])
{
Bookmark start_position = SetBookmark();
result = ParseFunctionImpl(false);
if (result && !HasMoreTokens())
return result;
}
// Try to parse the name as function with function pointer return type
// e.g. void (*get_func(const char*))()
result = ParseFuncPtr(true);
if (result)
return result;
// Finally try to parse the name as a function with non-function return type
// e.g. int main(int, char*[])
result = ParseFunctionImpl(true);
return result;
}
Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
m_next_token_index = 0;
Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
if (!name_ranges)
return None;
ParsedName result;
result.basename = GetTextForRange(name_ranges.getValue().basename_range);
result.context = GetTextForRange(name_ranges.getValue().context_range);
return result;
}
bool CPlusPlusNameParser::HasMoreTokens() {
return m_next_token_index < m_tokens.size();
}
void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
if (!HasMoreTokens())
return false;
if (!Peek().is(kind))
return false;
Advance();
return true;
}
template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
if (!HasMoreTokens())
return false;
if (!Peek().isOneOf(kinds...))
return false;
Advance();
return true;
}
CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
return Bookmark(m_next_token_index);
}
size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
clang::Token &CPlusPlusNameParser::Peek() {
assert(HasMoreTokens());
return m_tokens[m_next_token_index];
}
Optional<ParsedFunction>
CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
Bookmark start_position = SetBookmark();
if (expect_return_type) {
// Consume return type if it's expected.
if (!ConsumeTypename())
return None;
}
auto maybe_name = ParseFullNameImpl();
if (!maybe_name) {
return None;
}
size_t argument_start = GetCurrentPosition();
if (!ConsumeArguments()) {
return None;
}
size_t qualifiers_start = GetCurrentPosition();
SkipFunctionQualifiers();
size_t end_position = GetCurrentPosition();
ParsedFunction result;
result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
result.name.context = GetTextForRange(maybe_name.getValue().context_range);
result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
start_position.Remove();
return result;
}
Optional<ParsedFunction>
CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
Bookmark start_position = SetBookmark();
if (expect_return_type) {
// Consume return type.
if (!ConsumeTypename())
return None;
}
if (!ConsumeToken(tok::l_paren))
return None;
if (!ConsumePtrsAndRefs())
return None;
{
Bookmark before_inner_function_pos = SetBookmark();
auto maybe_inner_function_name = ParseFunctionImpl(false);
if (maybe_inner_function_name)
if (ConsumeToken(tok::r_paren))
if (ConsumeArguments()) {
SkipFunctionQualifiers();
start_position.Remove();
before_inner_function_pos.Remove();
return maybe_inner_function_name;
}
}
auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
if (maybe_inner_function_ptr_name)
if (ConsumeToken(tok::r_paren))
if (ConsumeArguments()) {
SkipFunctionQualifiers();
start_position.Remove();
return maybe_inner_function_ptr_name;
}
return None;
}
bool CPlusPlusNameParser::ConsumeArguments() {
return ConsumeBrackets(tok::l_paren, tok::r_paren);
}
bool CPlusPlusNameParser::ConsumeTemplateArgs() {
Bookmark start_position = SetBookmark();
if (!HasMoreTokens() || Peek().getKind() != tok::less)
return false;
Advance();
// Consuming template arguments is a bit trickier than consuming function
// arguments, because '<' '>' brackets are not always trivially balanced.
// In some rare cases tokens '<' and '>' can appear inside template arguments
// as arithmetic or shift operators not as template brackets.
// Examples: std::enable_if<(10u)<(64), bool>
// f<A<operator<(X,Y)::Subclass>>
// Good thing that compiler makes sure that really ambiguous cases of
// '>' usage should be enclosed within '()' brackets.
int template_counter = 1;
bool can_open_template = false;
while (HasMoreTokens() && template_counter > 0) {
tok::TokenKind kind = Peek().getKind();
switch (kind) {
case tok::greatergreater:
template_counter -= 2;
can_open_template = false;
Advance();
break;
case tok::greater:
--template_counter;
can_open_template = false;
Advance();
break;
case tok::less:
// '<' is an attempt to open a subteamplte
// check if parser is at the point where it's actually possible,
// otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
// No need to do the same for '>' because compiler actually makes sure
// that '>' always surrounded by brackets to avoid ambiguity.
if (can_open_template)
++template_counter;
can_open_template = false;
Advance();
break;
case tok::kw_operator: // C++ operator overloading.
if (!ConsumeOperator())
return false;
can_open_template = true;
break;
case tok::raw_identifier:
can_open_template = true;
Advance();
break;
case tok::l_square:
if (!ConsumeBrackets(tok::l_square, tok::r_square))
return false;
can_open_template = false;
break;
case tok::l_paren:
if (!ConsumeArguments())
return false;
can_open_template = false;
break;
default:
can_open_template = false;
Advance();
break;
}
}
assert(template_counter >= 0);
if (template_counter > 0) {
return false;
}
start_position.Remove();
return true;
}
bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
Bookmark start_position = SetBookmark();
if (!ConsumeToken(tok::l_paren)) {
return false;
}
constexpr llvm::StringLiteral g_anonymous("anonymous");
if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
Peek().getRawIdentifier() == g_anonymous) {
Advance();
} else {
return false;
}
if (!ConsumeToken(tok::kw_namespace)) {
return false;
}
if (!ConsumeToken(tok::r_paren)) {
return false;
}
start_position.Remove();
return true;
}
bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
tok::TokenKind right) {
Bookmark start_position = SetBookmark();
if (!HasMoreTokens() || Peek().getKind() != left)
return false;
Advance();
int counter = 1;
while (HasMoreTokens() && counter > 0) {
tok::TokenKind kind = Peek().getKind();
if (kind == right)
--counter;
else if (kind == left)
++counter;
Advance();
}
assert(counter >= 0);
if (counter > 0) {
return false;
}
start_position.Remove();
return true;
}
bool CPlusPlusNameParser::ConsumeOperator() {
Bookmark start_position = SetBookmark();
if (!ConsumeToken(tok::kw_operator))
return false;
if (!HasMoreTokens()) {
return false;
}
const auto &token = Peek();
switch (token.getKind()) {
case tok::kw_new:
case tok::kw_delete:
// This is 'new' or 'delete' operators.
Advance();
// Check for array new/delete.
if (HasMoreTokens() && Peek().is(tok::l_square)) {
// Consume the '[' and ']'.
if (!ConsumeBrackets(tok::l_square, tok::r_square))
return false;
}
break;
#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
case tok::Token: \
Advance(); \
break;
#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
#include "clang/Basic/OperatorKinds.def"
#undef OVERLOADED_OPERATOR
#undef OVERLOADED_OPERATOR_MULTI
case tok::l_paren:
// Call operator consume '(' ... ')'.
if (ConsumeBrackets(tok::l_paren, tok::r_paren))
break;
return false;
case tok::l_square:
// This is a [] operator.
// Consume the '[' and ']'.
if (ConsumeBrackets(tok::l_square, tok::r_square))
break;
return false;
default:
// This might be a cast operator.
if (ConsumeTypename())
break;
return false;
}
start_position.Remove();
return true;
}
void CPlusPlusNameParser::SkipTypeQualifiers() {
while (ConsumeToken(tok::kw_const, tok::kw_volatile))
;
}
void CPlusPlusNameParser::SkipFunctionQualifiers() {
while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
;
}
bool CPlusPlusNameParser::ConsumeBuiltinType() {
bool result = false;
bool continue_parsing = true;
// Built-in types can be made of a few keywords
// like 'unsigned long long int'. This function
// consumes all built-in type keywords without
// checking if they make sense like 'unsigned char void'.
while (continue_parsing && HasMoreTokens()) {
switch (Peek().getKind()) {
case tok::kw_short:
case tok::kw_long:
case tok::kw___int64:
case tok::kw___int128:
case tok::kw_signed:
case tok::kw_unsigned:
case tok::kw_void:
case tok::kw_char:
case tok::kw_int:
case tok::kw_half:
case tok::kw_float:
case tok::kw_double:
case tok::kw___float128:
case tok::kw_wchar_t:
case tok::kw_bool:
case tok::kw_char16_t:
case tok::kw_char32_t:
result = true;
Advance();
break;
default:
continue_parsing = false;
break;
}
}
return result;
}
void CPlusPlusNameParser::SkipPtrsAndRefs() {
// Ignoring result.
ConsumePtrsAndRefs();
}
bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
bool found = false;
SkipTypeQualifiers();
while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
tok::kw_volatile)) {
found = true;
SkipTypeQualifiers();
}
return found;
}
bool CPlusPlusNameParser::ConsumeTypename() {
Bookmark start_position = SetBookmark();
SkipTypeQualifiers();
if (!ConsumeBuiltinType()) {
if (!ParseFullNameImpl())
return false;
}
SkipPtrsAndRefs();
start_position.Remove();
return true;
}
Optional<CPlusPlusNameParser::ParsedNameRanges>
CPlusPlusNameParser::ParseFullNameImpl() {
// Name parsing state machine.
enum class State {
Beginning, // start of the name
AfterTwoColons, // right after ::
AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
AfterTemplate, // right after template brackets (<something>)
AfterOperator, // right after name of C++ operator
};
Bookmark start_position = SetBookmark();
State state = State::Beginning;
bool continue_parsing = true;
Optional<size_t> last_coloncolon_position = None;
while (continue_parsing && HasMoreTokens()) {
const auto &token = Peek();
switch (token.getKind()) {
case tok::raw_identifier: // Just a name.
if (state != State::Beginning && state != State::AfterTwoColons) {
continue_parsing = false;
break;
}
Advance();
state = State::AfterIdentifier;
break;
case tok::l_paren: {
if (state == State::Beginning || state == State::AfterTwoColons) {
// (anonymous namespace)
if (ConsumeAnonymousNamespace()) {
state = State::AfterIdentifier;
break;
}
}
// Type declared inside a function 'func()::Type'
if (state != State::AfterIdentifier && state != State::AfterTemplate &&
state != State::AfterOperator) {
continue_parsing = false;
break;
}
Bookmark l_paren_position = SetBookmark();
// Consume the '(' ... ') [const]'.
if (!ConsumeArguments()) {
continue_parsing = false;
break;
}
SkipFunctionQualifiers();
// Consume '::'
size_t coloncolon_position = GetCurrentPosition();
if (!ConsumeToken(tok::coloncolon)) {
continue_parsing = false;
break;
}
l_paren_position.Remove();
last_coloncolon_position = coloncolon_position;
state = State::AfterTwoColons;
break;
}
case tok::coloncolon: // Type nesting delimiter.
if (state != State::Beginning && state != State::AfterIdentifier &&
state != State::AfterTemplate) {
continue_parsing = false;
break;
}
last_coloncolon_position = GetCurrentPosition();
Advance();
state = State::AfterTwoColons;
break;
case tok::less: // Template brackets.
if (state != State::AfterIdentifier && state != State::AfterOperator) {
continue_parsing = false;
break;
}
if (!ConsumeTemplateArgs()) {
continue_parsing = false;
break;
}
state = State::AfterTemplate;
break;
case tok::kw_operator: // C++ operator overloading.
if (state != State::Beginning && state != State::AfterTwoColons) {
continue_parsing = false;
break;
}
if (!ConsumeOperator()) {
continue_parsing = false;
break;
}
state = State::AfterOperator;
break;
case tok::tilde: // Destructor.
if (state != State::Beginning && state != State::AfterTwoColons) {
continue_parsing = false;
break;
}
Advance();
if (ConsumeToken(tok::raw_identifier)) {
state = State::AfterIdentifier;
} else {
TakeBack();
continue_parsing = false;
}
break;
default:
continue_parsing = false;
break;
}
}
if (state == State::AfterIdentifier || state == State::AfterOperator ||
state == State::AfterTemplate) {
ParsedNameRanges result;
if (last_coloncolon_position) {
result.context_range = Range(start_position.GetSavedPosition(),
last_coloncolon_position.getValue());
result.basename_range =
Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
} else {
result.basename_range =
Range(start_position.GetSavedPosition(), GetCurrentPosition());
}
start_position.Remove();
return result;
} else {
return None;
}
}
llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
if (range.empty())
return llvm::StringRef();
assert(range.begin_index < range.end_index);
assert(range.begin_index < m_tokens.size());
assert(range.end_index <= m_tokens.size());
clang::Token &first_token = m_tokens[range.begin_index];
clang::Token &last_token = m_tokens[range.end_index - 1];
clang::SourceLocation start_loc = first_token.getLocation();
clang::SourceLocation end_loc = last_token.getLocation();
unsigned start_pos = start_loc.getRawEncoding();
unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
return m_text.take_front(end_pos).drop_front(start_pos);
}
static const clang::LangOptions &GetLangOptions() {
static clang::LangOptions g_options;
static llvm::once_flag g_once_flag;
llvm::call_once(g_once_flag, []() {
g_options.LineComment = true;
g_options.C99 = true;
g_options.C11 = true;
g_options.CPlusPlus = true;
g_options.CPlusPlus11 = true;
g_options.CPlusPlus14 = true;
g_options.CPlusPlus1z = true;
});
return g_options;
}
static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
static llvm::StringMap<tok::TokenKind> g_map{
#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
#include "clang/Basic/TokenKinds.def"
#undef KEYWORD
};
return g_map;
}
void CPlusPlusNameParser::ExtractTokens() {
clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
m_text.data(), m_text.data() + m_text.size());
const auto &kw_map = GetKeywordsMap();
clang::Token token;
for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
lexer.LexFromRawLexer(token)) {
if (token.is(clang::tok::raw_identifier)) {
auto it = kw_map.find(token.getRawIdentifier());
if (it != kw_map.end()) {
token.setKind(it->getValue());
}
}
m_tokens.push_back(token);
}
}

View File

@ -0,0 +1,176 @@
//===-- CPlusPlusNameParser.h -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef liblldb_CPlusPlusNameParser_h_
#define liblldb_CPlusPlusNameParser_h_
// C Includes
// C++ Includes
// Other libraries and framework includes
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
// Project includes
#include "lldb/Utility/ConstString.h"
#include "lldb/lldb-private.h"
namespace lldb_private {
// Helps to validate and obtain various parts of C++ definitions.
class CPlusPlusNameParser {
public:
CPlusPlusNameParser(llvm::StringRef text) : m_text(text) { ExtractTokens(); }
struct ParsedName {
llvm::StringRef basename;
llvm::StringRef context;
};
struct ParsedFunction {
ParsedName name;
llvm::StringRef arguments;
llvm::StringRef qualifiers;
};
// Treats given text as a function definition and parses it.
// Function definition might or might not have a return type and this should
// change parsing result.
// Examples:
// main(int, chat const*)
// T fun(int, bool)
// std::vector<int>::push_back(int)
// int& map<int, pair<short, int>>::operator[](short) const
// int (*get_function(const chat *))()
llvm::Optional<ParsedFunction> ParseAsFunctionDefinition();
// Treats given text as a potentially nested name of C++ entity (function,
// class, field) and parses it.
// Examples:
// main
// fun
// std::vector<int>::push_back
// map<int, pair<short, int>>::operator[]
// func<C>(int, C&)::nested_class::method
llvm::Optional<ParsedName> ParseAsFullName();
private:
// A C++ definition to parse.
llvm::StringRef m_text;
// Tokens extracted from m_text.
llvm::SmallVector<clang::Token, 30> m_tokens;
// Index of the next token to look at from m_tokens.
size_t m_next_token_index = 0;
// Range of tokens saved in m_next_token_index.
struct Range {
size_t begin_index = 0;
size_t end_index = 0;
Range() {}
Range(size_t begin, size_t end) : begin_index(begin), end_index(end) {
assert(end >= begin);
}
size_t size() const { return end_index - begin_index; }
bool empty() const { return size() == 0; }
};
struct ParsedNameRanges {
Range basename_range;
Range context_range;
};
// Bookmark automatically restores parsing position (m_next_token_index)
// when destructed unless it's manually removed with Remove().
class Bookmark {
public:
Bookmark(size_t &position)
: m_position(position), m_position_value(position) {}
Bookmark(const Bookmark &) = delete;
Bookmark(Bookmark &&b)
: m_position(b.m_position), m_position_value(b.m_position_value),
m_restore(b.m_restore) {
b.Remove();
}
Bookmark &operator=(Bookmark &&) = delete;
Bookmark &operator=(const Bookmark &) = delete;
void Remove() { m_restore = false; }
size_t GetSavedPosition() { return m_position_value; }
~Bookmark() {
if (m_restore) {
m_position = m_position_value;
}
}
private:
size_t &m_position;
size_t m_position_value;
bool m_restore = true;
};
bool HasMoreTokens();
void Advance();
void TakeBack();
bool ConsumeToken(clang::tok::TokenKind kind);
template <typename... Ts> bool ConsumeToken(Ts... kinds);
Bookmark SetBookmark();
size_t GetCurrentPosition();
clang::Token &Peek();
bool ConsumeBrackets(clang::tok::TokenKind left, clang::tok::TokenKind right);
llvm::Optional<ParsedFunction> ParseFunctionImpl(bool expect_return_type);
// Parses functions returning function pointers 'string (*f(int x))(float y)'
llvm::Optional<ParsedFunction> ParseFuncPtr(bool expect_return_type);
// Consumes function arguments enclosed within '(' ... ')'
bool ConsumeArguments();
// Consumes template arguments enclosed within '<' ... '>'
bool ConsumeTemplateArgs();
// Consumes '(anonymous namespace)'
bool ConsumeAnonymousNamespace();
// Consumes operator declaration like 'operator *' or 'operator delete []'
bool ConsumeOperator();
// Skips 'const' and 'volatile'
void SkipTypeQualifiers();
// Skips 'const', 'volatile', '&', '&&' in the end of the function.
void SkipFunctionQualifiers();
// Consumes built-in types like 'int' or 'unsigned long long int'
bool ConsumeBuiltinType();
// Skips 'const' and 'volatile'
void SkipPtrsAndRefs();
// Consumes things like 'const * const &'
bool ConsumePtrsAndRefs();
// Consumes full type name like 'Namespace::Class<int>::Method()::InnerClass'
bool ConsumeTypename();
llvm::Optional<ParsedNameRanges> ParseFullNameImpl();
llvm::StringRef GetTextForRange(const Range &range);
// Populate m_tokens by calling clang lexer on m_text.
void ExtractTokens();
};
} // namespace lldb_private
#endif // liblldb_CPlusPlusNameParser_h_

View File

@ -6,35 +6,139 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "gtest/gtest.h"
#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h"
using namespace lldb_private;
TEST(CPlusPlusLanguage, MethodName) {
TEST(CPlusPlusLanguage, MethodNameParsing) {
struct TestCase {
std::string input;
std::string context, basename, arguments, qualifiers, scope_qualified_name;
};
TestCase test_cases[] = {
{"foo::bar(baz)", "foo", "bar", "(baz)", "", "foo::bar"},
{"main(int, char *[]) ", "", "main", "(int, char *[])", "", "main"},
{"foo::bar(baz) const", "foo", "bar", "(baz)", "const", "foo::bar"},
{"foo::~bar(baz)", "foo", "~bar", "(baz)", "", "foo::~bar"},
{"a::b::c::d(e,f)", "a::b::c", "d", "(e,f)", "", "a::b::c::d"},
{"void f(int)", "", "f", "(int)", "", "f"},
// Operators
{"std::basic_ostream<char, std::char_traits<char> >& "
"std::operator<<<std::char_traits<char> >"
"(std::basic_ostream<char, std::char_traits<char> >&, char const*)",
"std", "operator<<<std::char_traits<char> >",
"(std::basic_ostream<char, std::char_traits<char> >&, char const*)", "",
"std::operator<<<std::char_traits<char> >"}};
"std::operator<<<std::char_traits<char> >"},
{"operator delete[](void*, clang::ASTContext const&, unsigned long)", "",
"operator delete[]", "(void*, clang::ASTContext const&, unsigned long)",
"", "operator delete[]"},
{"llvm::Optional<clang::PostInitializer>::operator bool() const",
"llvm::Optional<clang::PostInitializer>", "operator bool", "()", "const",
"llvm::Optional<clang::PostInitializer>::operator bool"},
{"(anonymous namespace)::FactManager::operator[](unsigned short)",
"(anonymous namespace)::FactManager", "operator[]", "(unsigned short)",
"", "(anonymous namespace)::FactManager::operator[]"},
{"const int& std::map<int, pair<short, int>>::operator[](short) const",
"std::map<int, pair<short, int>>", "operator[]", "(short)", "const",
"std::map<int, pair<short, int>>::operator[]"},
{"CompareInsn::operator()(llvm::StringRef, InsnMatchEntry const&)",
"CompareInsn", "operator()", "(llvm::StringRef, InsnMatchEntry const&)",
"", "CompareInsn::operator()"},
{"llvm::Optional<llvm::MCFixupKind>::operator*() const &",
"llvm::Optional<llvm::MCFixupKind>", "operator*", "()", "const &",
"llvm::Optional<llvm::MCFixupKind>::operator*"},
// Internal classes
{"operator<<(Cls, Cls)::Subclass::function()",
"operator<<(Cls, Cls)::Subclass", "function", "()", "",
"operator<<(Cls, Cls)::Subclass::function"},
{"SAEC::checkFunction(context&) const::CallBack::CallBack(int)",
"SAEC::checkFunction(context&) const::CallBack", "CallBack", "(int)", "",
"SAEC::checkFunction(context&) const::CallBack::CallBack"},
// Anonymous namespace
{"XX::(anonymous namespace)::anon_class::anon_func() const",
"XX::(anonymous namespace)::anon_class", "anon_func", "()", "const",
"XX::(anonymous namespace)::anon_class::anon_func"},
// Function pointers
{"string (*f(vector<int>&&))(float)", "", "f", "(vector<int>&&)", "",
"f"},
{"void (*&std::_Any_data::_M_access<void (*)()>())()", "std::_Any_data",
"_M_access<void (*)()>", "()", "",
"std::_Any_data::_M_access<void (*)()>"},
{"void (*(*(*(*(*(*(*(* const&func1(int))())())())())())())())()", "",
"func1", "(int)", "", "func1"},
// Templates
{"void llvm::PM<llvm::Module, llvm::AM<llvm::Module>>::"
"addPass<llvm::VP>(llvm::VP)",
"llvm::PM<llvm::Module, llvm::AM<llvm::Module>>", "addPass<llvm::VP>",
"(llvm::VP)", "",
"llvm::PM<llvm::Module, llvm::AM<llvm::Module>>::"
"addPass<llvm::VP>"},
{"void std::vector<Class, std::allocator<Class> >"
"::_M_emplace_back_aux<Class const&>(Class const&)",
"std::vector<Class, std::allocator<Class> >",
"_M_emplace_back_aux<Class const&>", "(Class const&)", "",
"std::vector<Class, std::allocator<Class> >::"
"_M_emplace_back_aux<Class const&>"},
{"unsigned long llvm::countTrailingOnes<unsigned int>"
"(unsigned int, llvm::ZeroBehavior)",
"llvm", "countTrailingOnes<unsigned int>",
"(unsigned int, llvm::ZeroBehavior)", "",
"llvm::countTrailingOnes<unsigned int>"},
{"std::enable_if<(10u)<(64), bool>::type llvm::isUInt<10u>(unsigned "
"long)",
"llvm", "isUInt<10u>", "(unsigned long)", "", "llvm::isUInt<10u>"},
{"f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>()", "",
"f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>", "()", "",
"f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>"}};
for (const auto &test : test_cases) {
CPlusPlusLanguage::MethodName method(ConstString(test.input));
EXPECT_TRUE(method.IsValid());
EXPECT_EQ(test.context, method.GetContext());
EXPECT_EQ(test.basename, method.GetBasename());
EXPECT_EQ(test.arguments, method.GetArguments());
EXPECT_EQ(test.qualifiers, method.GetQualifiers());
EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName());
EXPECT_TRUE(method.IsValid()) << test.input;
if (method.IsValid()) {
EXPECT_EQ(test.context, method.GetContext().str());
EXPECT_EQ(test.basename, method.GetBasename().str());
EXPECT_EQ(test.arguments, method.GetArguments().str());
EXPECT_EQ(test.qualifiers, method.GetQualifiers().str());
EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName());
}
}
}
TEST(CPlusPlusLanguage, ExtractContextAndIdentifier) {
struct TestCase {
std::string input;
std::string context, basename;
};
TestCase test_cases[] = {
{"main", "", "main"},
{"foo01::bar", "foo01", "bar"},
{"foo::~bar", "foo", "~bar"},
{"std::vector<int>::push_back", "std::vector<int>", "push_back"},
{"operator<<(Cls, Cls)::Subclass::function",
"operator<<(Cls, Cls)::Subclass", "function"},
{"std::vector<Class, std::allocator<Class>>"
"::_M_emplace_back_aux<Class const&>",
"std::vector<Class, std::allocator<Class>>",
"_M_emplace_back_aux<Class const&>"}};
llvm::StringRef context, basename;
for (const auto &test : test_cases) {
EXPECT_TRUE(CPlusPlusLanguage::ExtractContextAndIdentifier(
test.input.c_str(), context, basename));
EXPECT_EQ(test.context, context.str());
EXPECT_EQ(test.basename, basename.str());
}
EXPECT_FALSE(CPlusPlusLanguage::ExtractContextAndIdentifier("void", context,
basename));
EXPECT_FALSE(
CPlusPlusLanguage::ExtractContextAndIdentifier("321", context, basename));
EXPECT_FALSE(
CPlusPlusLanguage::ExtractContextAndIdentifier("", context, basename));
}