forked from OSchip/llvm-project
782 lines
26 KiB
C++
782 lines
26 KiB
C++
//===- TokensTest.cpp -----------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "clang/Tooling/Syntax/Tokens.h"
|
|
#include "clang/AST/ASTConsumer.h"
|
|
#include "clang/AST/Expr.h"
|
|
#include "clang/Basic/Diagnostic.h"
|
|
#include "clang/Basic/DiagnosticIDs.h"
|
|
#include "clang/Basic/DiagnosticOptions.h"
|
|
#include "clang/Basic/FileManager.h"
|
|
#include "clang/Basic/FileSystemOptions.h"
|
|
#include "clang/Basic/LLVM.h"
|
|
#include "clang/Basic/LangOptions.h"
|
|
#include "clang/Basic/SourceLocation.h"
|
|
#include "clang/Basic/SourceManager.h"
|
|
#include "clang/Basic/TokenKinds.def"
|
|
#include "clang/Basic/TokenKinds.h"
|
|
#include "clang/Frontend/CompilerInstance.h"
|
|
#include "clang/Frontend/FrontendAction.h"
|
|
#include "clang/Frontend/Utils.h"
|
|
#include "clang/Lex/Lexer.h"
|
|
#include "clang/Lex/PreprocessorOptions.h"
|
|
#include "clang/Lex/Token.h"
|
|
#include "clang/Tooling/Tooling.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/IntrusiveRefCntPtr.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/VirtualFileSystem.h"
|
|
#include "llvm/Support/raw_os_ostream.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Testing/Support/Annotations.h"
|
|
#include "llvm/Testing/Support/SupportHelpers.h"
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <gmock/gmock.h>
|
|
#include <gtest/gtest.h>
|
|
#include <memory>
|
|
#include <ostream>
|
|
#include <string>
|
|
|
|
using namespace clang;
|
|
using namespace clang::syntax;
|
|
|
|
using llvm::ValueIs;
|
|
using ::testing::AllOf;
|
|
using ::testing::Contains;
|
|
using ::testing::ElementsAre;
|
|
using ::testing::Field;
|
|
using ::testing::Matcher;
|
|
using ::testing::Not;
|
|
using ::testing::StartsWith;
|
|
|
|
namespace {
|
|
// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
|
|
// argument.
|
|
MATCHER_P(SameRange, A, "") {
|
|
return A.begin() == arg.begin() && A.end() == arg.end();
|
|
}
|
|
|
|
Matcher<TokenBuffer::Expansion>
|
|
IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
|
|
Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
|
|
return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
|
|
Field(&TokenBuffer::Expansion::Expanded, Expanded));
|
|
}
|
|
// Matchers for syntax::Token.
|
|
MATCHER_P(Kind, K, "") { return arg.kind() == K; }
|
|
MATCHER_P2(HasText, Text, SourceMgr, "") {
|
|
return arg.text(*SourceMgr) == Text;
|
|
}
|
|
/// Checks the start and end location of a token are equal to SourceRng.
|
|
MATCHER_P(RangeIs, SourceRng, "") {
|
|
return arg.location() == SourceRng.first &&
|
|
arg.endLocation() == SourceRng.second;
|
|
}
|
|
|
|
class TokenCollectorTest : public ::testing::Test {
|
|
public:
|
|
/// Run the clang frontend, collect the preprocessed tokens from the frontend
|
|
/// invocation and store them in this->Buffer.
|
|
/// This also clears SourceManager before running the compiler.
|
|
void recordTokens(llvm::StringRef Code) {
|
|
class RecordTokens : public ASTFrontendAction {
|
|
public:
|
|
explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
|
|
|
|
bool BeginSourceFileAction(CompilerInstance &CI) override {
|
|
assert(!Collector && "expected only a single call to BeginSourceFile");
|
|
Collector.emplace(CI.getPreprocessor());
|
|
return true;
|
|
}
|
|
void EndSourceFileAction() override {
|
|
assert(Collector && "BeginSourceFileAction was never called");
|
|
Result = std::move(*Collector).consume();
|
|
}
|
|
|
|
std::unique_ptr<ASTConsumer>
|
|
CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
|
|
return std::make_unique<ASTConsumer>();
|
|
}
|
|
|
|
private:
|
|
TokenBuffer &Result;
|
|
llvm::Optional<TokenCollector> Collector;
|
|
};
|
|
|
|
constexpr const char *FileName = "./input.cpp";
|
|
FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
|
|
// Prepare to run a compiler.
|
|
if (!Diags->getClient())
|
|
Diags->setClient(new IgnoringDiagConsumer);
|
|
std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
|
|
FileName};
|
|
auto CI = createInvocationFromCommandLine(Args, Diags, FS);
|
|
assert(CI);
|
|
CI->getFrontendOpts().DisableFree = false;
|
|
CI->getPreprocessorOpts().addRemappedFile(
|
|
FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
|
|
CompilerInstance Compiler;
|
|
Compiler.setInvocation(std::move(CI));
|
|
Compiler.setDiagnostics(Diags.get());
|
|
Compiler.setFileManager(FileMgr.get());
|
|
Compiler.setSourceManager(SourceMgr.get());
|
|
|
|
this->Buffer = TokenBuffer(*SourceMgr);
|
|
RecordTokens Recorder(this->Buffer);
|
|
ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
|
|
<< "failed to run the frontend";
|
|
}
|
|
|
|
/// Record the tokens and return a test dump of the resulting buffer.
|
|
std::string collectAndDump(llvm::StringRef Code) {
|
|
recordTokens(Code);
|
|
return Buffer.dumpForTests();
|
|
}
|
|
|
|
// Adds a file to the test VFS.
|
|
void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
|
|
if (!FS->addFile(Path, time_t(),
|
|
llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
|
|
ADD_FAILURE() << "could not add a file to VFS: " << Path;
|
|
}
|
|
}
|
|
|
|
/// Add a new file, run syntax::tokenize() on it and return the results.
|
|
std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
|
|
// FIXME: pass proper LangOptions.
|
|
return syntax::tokenize(
|
|
SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
|
|
*SourceMgr, LangOptions());
|
|
}
|
|
|
|
// Specialized versions of matchers that hide the SourceManager from clients.
|
|
Matcher<syntax::Token> HasText(std::string Text) const {
|
|
return ::HasText(Text, SourceMgr.get());
|
|
}
|
|
Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
|
|
std::pair<SourceLocation, SourceLocation> Ls;
|
|
Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
|
|
.getLocWithOffset(R.Begin);
|
|
Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
|
|
.getLocWithOffset(R.End);
|
|
return ::RangeIs(Ls);
|
|
}
|
|
|
|
/// Finds a subrange in O(n * m).
|
|
template <class T, class U, class Eq>
|
|
llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
|
|
llvm::ArrayRef<T> Range, Eq F) {
|
|
for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
|
|
auto It = Begin;
|
|
for (auto ItSub = Subrange.begin();
|
|
ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
|
|
if (!F(*ItSub, *It))
|
|
goto continue_outer;
|
|
}
|
|
return llvm::makeArrayRef(Begin, It);
|
|
continue_outer:;
|
|
}
|
|
return llvm::makeArrayRef(Range.end(), Range.end());
|
|
}
|
|
|
|
/// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
|
|
/// The match should be unique. \p Query is a whitespace-separated list of
|
|
/// tokens to search for.
|
|
llvm::ArrayRef<syntax::Token>
|
|
findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
|
|
llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
|
|
Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
|
|
if (QueryTokens.empty()) {
|
|
ADD_FAILURE() << "will not look for an empty list of tokens";
|
|
std::abort();
|
|
}
|
|
// An equality test for search.
|
|
auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
|
|
return Q == T.text(*SourceMgr);
|
|
};
|
|
// Find a match.
|
|
auto Found =
|
|
findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
|
|
if (Found.begin() == Tokens.end()) {
|
|
ADD_FAILURE() << "could not find the subrange for " << Query;
|
|
std::abort();
|
|
}
|
|
// Check that the match is unique.
|
|
if (findSubrange(llvm::makeArrayRef(QueryTokens),
|
|
llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
|
|
.begin() != Tokens.end()) {
|
|
ADD_FAILURE() << "match is not unique for " << Query;
|
|
std::abort();
|
|
}
|
|
return Found;
|
|
};
|
|
|
|
// Specialized versions of findTokenRange for expanded and spelled tokens.
|
|
llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
|
|
return findTokenRange(Query, Buffer.expandedTokens());
|
|
}
|
|
llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
|
|
FileID File = FileID()) {
|
|
if (!File.isValid())
|
|
File = SourceMgr->getMainFileID();
|
|
return findTokenRange(Query, Buffer.spelledTokens(File));
|
|
}
|
|
|
|
// Data fields.
|
|
llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
|
|
new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
|
|
IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
|
|
new llvm::vfs::InMemoryFileSystem;
|
|
llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
|
|
new FileManager(FileSystemOptions(), FS);
|
|
llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
|
|
new SourceManager(*Diags, *FileMgr);
|
|
/// Contains last result of calling recordTokens().
|
|
TokenBuffer Buffer = TokenBuffer(*SourceMgr);
|
|
};
|
|
|
|
TEST_F(TokenCollectorTest, RawMode) {
|
|
EXPECT_THAT(tokenize("int main() {}"),
|
|
ElementsAre(Kind(tok::kw_int),
|
|
AllOf(HasText("main"), Kind(tok::identifier)),
|
|
Kind(tok::l_paren), Kind(tok::r_paren),
|
|
Kind(tok::l_brace), Kind(tok::r_brace)));
|
|
// Comments are ignored for now.
|
|
EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
|
|
ElementsAre(Kind(tok::kw_int),
|
|
AllOf(HasText("a"), Kind(tok::identifier)),
|
|
Kind(tok::semi)));
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, Basic) {
|
|
std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
|
|
{"int main() {}",
|
|
R"(expanded tokens:
|
|
int main ( ) { }
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
int main ( ) { }
|
|
no mappings.
|
|
)"},
|
|
// All kinds of whitespace are ignored.
|
|
{"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
|
|
R"(expanded tokens:
|
|
int main ( ) { }
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
int main ( ) { }
|
|
no mappings.
|
|
)"},
|
|
// Annotation tokens are ignored.
|
|
{R"cpp(
|
|
#pragma GCC visibility push (public)
|
|
#pragma GCC visibility pop
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
<empty>
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# pragma GCC visibility push ( public ) # pragma GCC visibility pop
|
|
mappings:
|
|
['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
|
|
)"},
|
|
// Empty files should not crash.
|
|
{R"cpp()cpp", R"(expanded tokens:
|
|
<empty>
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
<empty>
|
|
no mappings.
|
|
)"},
|
|
// Should not crash on errors inside '#define' directives. Error is that
|
|
// stringification (#B) does not refer to a macro parameter.
|
|
{
|
|
R"cpp(
|
|
a
|
|
#define MACRO() A #B
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
a
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
a # define MACRO ( ) A # B
|
|
mappings:
|
|
['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
|
|
)"}};
|
|
for (auto &Test : TestCases)
|
|
EXPECT_EQ(collectAndDump(Test.first), Test.second)
|
|
<< collectAndDump(Test.first);
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, Locations) {
|
|
// Check locations of the tokens.
|
|
llvm::Annotations Code(R"cpp(
|
|
$r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
|
|
)cpp");
|
|
recordTokens(Code.code());
|
|
// Check expanded tokens.
|
|
EXPECT_THAT(
|
|
Buffer.expandedTokens(),
|
|
ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
|
|
AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
|
|
AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
|
|
AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
|
|
AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
|
|
Kind(tok::eof)));
|
|
// Check spelled tokens.
|
|
EXPECT_THAT(
|
|
Buffer.spelledTokens(SourceMgr->getMainFileID()),
|
|
ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
|
|
AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
|
|
AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
|
|
AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
|
|
AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, MacroDirectives) {
|
|
// Macro directives are not stored anywhere at the moment.
|
|
std::string Code = R"cpp(
|
|
#define FOO a
|
|
#include "unresolved_file.h"
|
|
#undef FOO
|
|
#ifdef X
|
|
#else
|
|
#endif
|
|
#ifndef Y
|
|
#endif
|
|
#if 1
|
|
#elif 2
|
|
#else
|
|
#endif
|
|
#pragma once
|
|
#pragma something lalala
|
|
|
|
int a;
|
|
)cpp";
|
|
std::string Expected =
|
|
"expanded tokens:\n"
|
|
" int a ;\n"
|
|
"file './input.cpp'\n"
|
|
" spelled tokens:\n"
|
|
" # define FOO a # include \"unresolved_file.h\" # undef FOO "
|
|
"# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
|
|
"# endif # pragma once # pragma something lalala int a ;\n"
|
|
" mappings:\n"
|
|
" ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
|
|
EXPECT_EQ(collectAndDump(Code), Expected);
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, MacroReplacements) {
|
|
std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
|
|
// A simple object-like macro.
|
|
{R"cpp(
|
|
#define INT int const
|
|
INT a;
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
int const a ;
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# define INT int const INT a ;
|
|
mappings:
|
|
['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
|
|
['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
|
|
)"},
|
|
// A simple function-like macro.
|
|
{R"cpp(
|
|
#define INT(a) const int
|
|
INT(10+10) a;
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
const int a ;
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# define INT ( a ) const int INT ( 10 + 10 ) a ;
|
|
mappings:
|
|
['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
|
|
['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
|
|
)"},
|
|
// Recursive macro replacements.
|
|
{R"cpp(
|
|
#define ID(X) X
|
|
#define INT int const
|
|
ID(ID(INT)) a;
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
int const a ;
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
|
|
mappings:
|
|
['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
|
|
['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
|
|
)"},
|
|
// A little more complicated recursive macro replacements.
|
|
{R"cpp(
|
|
#define ADD(X, Y) X+Y
|
|
#define MULT(X, Y) X*Y
|
|
|
|
int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
|
|
)cpp",
|
|
"expanded tokens:\n"
|
|
" int a = 1 * 2 + 3 * 4 + 5 ;\n"
|
|
"file './input.cpp'\n"
|
|
" spelled tokens:\n"
|
|
" # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
|
|
"a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
|
|
" mappings:\n"
|
|
" ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
|
|
" ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
|
|
// Empty macro replacement.
|
|
// FIXME: the #define directives should not be glued together.
|
|
{R"cpp(
|
|
#define EMPTY
|
|
#define EMPTY_FUNC(X)
|
|
EMPTY
|
|
EMPTY_FUNC(1+2+3)
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
<empty>
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
|
|
mappings:
|
|
['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
|
|
['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
|
|
['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
|
|
)"},
|
|
// File ends with a macro replacement.
|
|
{R"cpp(
|
|
#define FOO 10+10;
|
|
int a = FOO
|
|
)cpp",
|
|
R"(expanded tokens:
|
|
int a = 10 + 10 ;
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# define FOO 10 + 10 ; int a = FOO
|
|
mappings:
|
|
['#'_0, 'int'_7) => ['int'_0, 'int'_0)
|
|
['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
|
|
)"}};
|
|
|
|
for (auto &Test : TestCases)
|
|
EXPECT_EQ(Test.second, collectAndDump(Test.first))
|
|
<< collectAndDump(Test.first);
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, SpecialTokens) {
|
|
// Tokens coming from concatenations.
|
|
recordTokens(R"cpp(
|
|
#define CONCAT(a, b) a ## b
|
|
int a = CONCAT(1, 2);
|
|
)cpp");
|
|
EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
|
|
Contains(HasText("12")));
|
|
// Multi-line tokens with slashes at the end.
|
|
recordTokens("i\\\nn\\\nt");
|
|
EXPECT_THAT(Buffer.expandedTokens(),
|
|
ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
|
|
Kind(tok::eof)));
|
|
// FIXME: test tokens with digraphs and UCN identifiers.
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, LateBoundTokens) {
|
|
// The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
|
|
// but we choose to record them as a single token (for now).
|
|
llvm::Annotations Code(R"cpp(
|
|
template <class T>
|
|
struct foo { int a; };
|
|
int bar = foo<foo<int$br[[>>]]().a;
|
|
int baz = 10 $op[[>>]] 2;
|
|
)cpp");
|
|
recordTokens(Code.code());
|
|
EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
|
|
AllOf(Contains(AllOf(Kind(tok::greatergreater),
|
|
RangeIs(Code.range("br")))),
|
|
Contains(AllOf(Kind(tok::greatergreater),
|
|
RangeIs(Code.range("op"))))));
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, DelayedParsing) {
|
|
llvm::StringLiteral Code = R"cpp(
|
|
struct Foo {
|
|
int method() {
|
|
// Parser will visit method bodies and initializers multiple times, but
|
|
// TokenBuffer should only record the first walk over the tokens;
|
|
return 100;
|
|
}
|
|
int a = 10;
|
|
|
|
struct Subclass {
|
|
void foo() {
|
|
Foo().method();
|
|
}
|
|
};
|
|
};
|
|
)cpp";
|
|
std::string ExpectedTokens =
|
|
"expanded tokens:\n"
|
|
" struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
|
|
"Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
|
|
EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
|
|
}
|
|
|
|
TEST_F(TokenCollectorTest, MultiFile) {
|
|
addFile("./foo.h", R"cpp(
|
|
#define ADD(X, Y) X+Y
|
|
int a = 100;
|
|
#include "bar.h"
|
|
)cpp");
|
|
addFile("./bar.h", R"cpp(
|
|
int b = ADD(1, 2);
|
|
#define MULT(X, Y) X*Y
|
|
)cpp");
|
|
llvm::StringLiteral Code = R"cpp(
|
|
#include "foo.h"
|
|
int c = ADD(1, MULT(2,3));
|
|
)cpp";
|
|
|
|
std::string Expected = R"(expanded tokens:
|
|
int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
|
|
file './input.cpp'
|
|
spelled tokens:
|
|
# include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
|
|
mappings:
|
|
['#'_0, 'int'_3) => ['int'_12, 'int'_12)
|
|
['ADD'_6, ';'_17) => ['1'_15, ';'_20)
|
|
file './foo.h'
|
|
spelled tokens:
|
|
# define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
|
|
mappings:
|
|
['#'_0, 'int'_11) => ['int'_0, 'int'_0)
|
|
['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
|
|
file './bar.h'
|
|
spelled tokens:
|
|
int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
|
|
mappings:
|
|
['ADD'_3, ';'_9) => ['1'_8, ';'_11)
|
|
['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
|
|
)";
|
|
|
|
EXPECT_EQ(Expected, collectAndDump(Code))
|
|
<< "input: " << Code << "\nresults: " << collectAndDump(Code);
|
|
}
|
|
|
|
class TokenBufferTest : public TokenCollectorTest {};
|
|
|
|
TEST_F(TokenBufferTest, SpelledByExpanded) {
|
|
recordTokens(R"cpp(
|
|
a1 a2 a3 b1 b2
|
|
)cpp");
|
|
|
|
// Sanity check: expanded and spelled tokens are stored separately.
|
|
EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
|
|
// Searching for subranges of expanded tokens should give the corresponding
|
|
// spelled ones.
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
|
|
ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
|
|
ValueIs(SameRange(findSpelled("a1 a2 a3"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
|
|
ValueIs(SameRange(findSpelled("b1 b2"))));
|
|
|
|
// Test search on simple macro expansions.
|
|
recordTokens(R"cpp(
|
|
#define A a1 a2 a3
|
|
#define B b1 b2
|
|
|
|
A split B
|
|
)cpp");
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
|
|
ValueIs(SameRange(findSpelled("A split B"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
|
|
ValueIs(SameRange(findSpelled("A split").drop_back())));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
|
|
ValueIs(SameRange(findSpelled("split B").drop_front())));
|
|
// Ranges not fully covering macro invocations should fail.
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
|
|
llvm::None);
|
|
|
|
// Recursive macro invocations.
|
|
recordTokens(R"cpp(
|
|
#define ID(x) x
|
|
#define B b1 b2
|
|
|
|
ID(ID(ID(a1) a2 a3)) split ID(B)
|
|
)cpp");
|
|
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
|
|
ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
|
|
ValueIs(SameRange(findSpelled("ID ( B )"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
|
|
ValueIs(SameRange(findSpelled(
|
|
"ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
|
|
// Ranges crossing macro call boundaries.
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
|
|
llvm::None);
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
|
|
llvm::None);
|
|
// FIXME: next two examples should map to macro arguments, but currently they
|
|
// fail.
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
|
|
EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
|
|
|
|
// Empty macro expansions.
|
|
recordTokens(R"cpp(
|
|
#define EMPTY
|
|
#define ID(X) X
|
|
|
|
EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
|
|
EMPTY EMPTY ID(4 5 6) split2
|
|
ID(7 8 9) EMPTY EMPTY
|
|
)cpp");
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
|
|
ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
|
|
ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
|
|
ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
|
|
|
|
// Empty mappings coming from various directives.
|
|
recordTokens(R"cpp(
|
|
#define ID(X) X
|
|
ID(1)
|
|
#pragma lalala
|
|
not_mapped
|
|
)cpp");
|
|
EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
|
|
ValueIs(SameRange(findSpelled("not_mapped"))));
|
|
}
|
|
|
|
TEST_F(TokenBufferTest, ExpansionStartingAt) {
|
|
// Object-like macro expansions.
|
|
recordTokens(R"cpp(
|
|
#define FOO 3+4
|
|
int a = FOO 1;
|
|
int b = FOO 2;
|
|
)cpp");
|
|
|
|
llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
|
|
EXPECT_THAT(
|
|
Buffer.expansionStartingAt(Foo1.data()),
|
|
ValueIs(IsExpansion(SameRange(Foo1),
|
|
SameRange(findExpanded("3 + 4 1").drop_back()))));
|
|
|
|
llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
|
|
EXPECT_THAT(
|
|
Buffer.expansionStartingAt(Foo2.data()),
|
|
ValueIs(IsExpansion(SameRange(Foo2),
|
|
SameRange(findExpanded("3 + 4 2").drop_back()))));
|
|
|
|
// Function-like macro expansions.
|
|
recordTokens(R"cpp(
|
|
#define ID(X) X
|
|
int a = ID(1+2+3);
|
|
int b = ID(ID(2+3+4));
|
|
)cpp");
|
|
|
|
llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
|
|
EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
|
|
ValueIs(IsExpansion(SameRange(ID1),
|
|
SameRange(findExpanded("1 + 2 + 3")))));
|
|
// Only the first spelled token should be found.
|
|
for (const auto &T : ID1.drop_front())
|
|
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
|
|
|
|
llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
|
|
EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
|
|
ValueIs(IsExpansion(SameRange(ID2),
|
|
SameRange(findExpanded("2 + 3 + 4")))));
|
|
// Only the first spelled token should be found.
|
|
for (const auto &T : ID2.drop_front())
|
|
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
|
|
|
|
// PP directives.
|
|
recordTokens(R"cpp(
|
|
#define FOO 1
|
|
int a = FOO;
|
|
#pragma once
|
|
int b = 1;
|
|
)cpp");
|
|
|
|
llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
|
|
EXPECT_THAT(
|
|
Buffer.expansionStartingAt(&DefineFoo.front()),
|
|
ValueIs(IsExpansion(SameRange(DefineFoo),
|
|
SameRange(findExpanded("int a").take_front(0)))));
|
|
// Only the first spelled token should be found.
|
|
for (const auto &T : DefineFoo.drop_front())
|
|
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
|
|
|
|
llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
|
|
EXPECT_THAT(
|
|
Buffer.expansionStartingAt(&PragmaOnce.front()),
|
|
ValueIs(IsExpansion(SameRange(PragmaOnce),
|
|
SameRange(findExpanded("int b").take_front(0)))));
|
|
// Only the first spelled token should be found.
|
|
for (const auto &T : PragmaOnce.drop_front())
|
|
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
|
|
}
|
|
|
|
TEST_F(TokenBufferTest, TokensToFileRange) {
|
|
addFile("./foo.h", "token_from_header");
|
|
llvm::Annotations Code(R"cpp(
|
|
#define FOO token_from_expansion
|
|
#include "./foo.h"
|
|
$all[[$i[[int]] a = FOO;]]
|
|
)cpp");
|
|
recordTokens(Code.code());
|
|
|
|
auto &SM = *SourceMgr;
|
|
|
|
// Two simple examples.
|
|
auto Int = findExpanded("int").front();
|
|
auto Semi = findExpanded(";").front();
|
|
EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
|
|
Code.range("i").End));
|
|
EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
|
|
FileRange(SM.getMainFileID(), Code.range("all").Begin,
|
|
Code.range("all").End));
|
|
// We don't test assertion failures because death tests are slow.
|
|
}
|
|
|
|
TEST_F(TokenBufferTest, macroExpansions) {
|
|
llvm::Annotations Code(R"cpp(
|
|
#define FOO B
|
|
#define FOO2 BA
|
|
#define CALL(X) int X
|
|
#define G CALL(FOO2)
|
|
int B;
|
|
$macro[[FOO]];
|
|
$macro[[CALL]](A);
|
|
$macro[[G]];
|
|
)cpp");
|
|
recordTokens(Code.code());
|
|
auto &SM = *SourceMgr;
|
|
auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
|
|
std::vector<FileRange> ExpectedMacroRanges;
|
|
for (auto Range : Code.ranges("macro"))
|
|
ExpectedMacroRanges.push_back(
|
|
FileRange(SM.getMainFileID(), Range.Begin, Range.End));
|
|
std::vector<FileRange> ActualMacroRanges;
|
|
for (auto Expansion : Expansions)
|
|
ActualMacroRanges.push_back(Expansion->range(SM));
|
|
EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
|
|
}
|
|
} // namespace
|