Experimental stab at using relexing to identify preprocessor

directives while annotating tokens in CIndex. This functionality
should probably be factored out of this routine, but we're not there
yet. 

llvm-svn: 98786
This commit is contained in:
Douglas Gregor 2010-03-18 00:42:48 +00:00
parent 8575110ae1
commit 92a524fd47
6 changed files with 185 additions and 9 deletions

View File

@ -796,7 +796,13 @@ enum CXCursorKind {
CXCursor_IBActionAttr = 401,
CXCursor_IBOutletAttr = 402,
CXCursor_LastAttr = CXCursor_IBOutletAttr
CXCursor_LastAttr = CXCursor_IBOutletAttr,
/* Preprocessing */
CXCursor_PreprocessingDirective = 500,
CXCursor_FirstPreprocessing = CXCursor_PreprocessingDirective,
CXCursor_LastPreprocessing = CXCursor_PreprocessingDirective
};
/**
@ -888,6 +894,12 @@ CINDEX_LINKAGE unsigned clang_isInvalid(enum CXCursorKind);
*/
CINDEX_LINKAGE unsigned clang_isTranslationUnit(enum CXCursorKind);
/***
* \brief Determine whether the given cursor represents a preprocessing
* element, such as a preprocessor directive or macro instantiation.
*/
CINDEX_LINKAGE unsigned clang_isPreprocessing(enum CXCursorKind);
/***
* \brief Determine whether the given cursor represents a currently
* unexposed piece of the AST (e.g., CXCursor_UnexposedStmt).

View File

@ -0,0 +1,31 @@
#define BAR baz
#define WIBBLE(X, Y)
WIBBLE(int, float)
int BAR;
#include "foo.h"
// RUN: c-index-test -test-annotate-tokens=%s:1:1:6:1 -I%S/Inputs %s | FileCheck %s
// CHECK: Punctuation: "#" [1:1 - 1:2] preprocessing directive=
// CHECK: Identifier: "define" [1:2 - 1:8] preprocessing directive=
// CHECK: Identifier: "BAR" [1:9 - 1:12] preprocessing directive=
// CHECK: Identifier: "baz" [1:13 - 1:16] preprocessing directive=
// CHECK: Punctuation: "#" [2:1 - 2:2] preprocessing directive=
// CHECK: Identifier: "define" [2:2 - 2:8] preprocessing directive=
// CHECK: Identifier: "WIBBLE" [2:9 - 2:15] preprocessing directive=
// CHECK: Punctuation: "(" [2:15 - 2:16] preprocessing directive=
// CHECK: Identifier: "X" [2:16 - 2:17] preprocessing directive=
// CHECK: Punctuation: "," [2:17 - 2:18] preprocessing directive=
// CHECK: Identifier: "Y" [2:19 - 2:20] preprocessing directive=
// CHECK: Punctuation: ")" [2:20 - 2:21] preprocessing directive=
// CHECK: Identifier: "WIBBLE" [3:1 - 3:7]
// CHECK: Punctuation: "(" [3:7 - 3:8]
// CHECK: Keyword: "int" [3:8 - 3:11]
// CHECK: Punctuation: "," [3:11 - 3:12]
// CHECK: Keyword: "float" [3:13 - 3:18]
// CHECK: Punctuation: ")" [3:18 - 3:19]
// CHECK: Keyword: "int" [4:1 - 4:4]
// CHECK: Identifier: "BAR" [4:5 - 4:8]
// CHECK: Punctuation: ";" [4:8 - 4:9]
// CHECK: Punctuation: "#" [5:1 - 5:2] preprocessing directive=
// CHECK: Identifier: "include" [5:2 - 5:9] preprocessing directive=
// CHECK: Literal: ""foo.h"" [5:10 - 5:17] preprocessing directive=

View File

@ -1518,8 +1518,10 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
return createCXString("UnexposedAttr");
case CXCursor_IBActionAttr:
return createCXString("attribute(ibaction)");
case CXCursor_IBOutletAttr:
return createCXString("attribute(iboutlet)");
case CXCursor_IBOutletAttr:
return createCXString("attribute(iboutlet)");
case CXCursor_PreprocessingDirective:
return createCXString("preprocessing directive");
}
llvm_unreachable("Unhandled CXCursorKind");
@ -1590,6 +1592,10 @@ unsigned clang_isTranslationUnit(enum CXCursorKind K) {
return K == CXCursor_TranslationUnit;
}
unsigned clang_isPreprocessing(enum CXCursorKind K) {
return K >= CXCursor_FirstPreprocessing && K <= CXCursor_LastPreprocessing;
}
unsigned clang_isUnexposed(enum CXCursorKind K) {
switch (K) {
case CXCursor_UnexposedDecl:
@ -1642,6 +1648,11 @@ CXSourceLocation clang_getCursorLocation(CXCursor C) {
return cxloc::translateSourceLocation(getCursorContext(C),
getLocationFromExpr(getCursorExpr(C)));
if (C.kind == CXCursor_PreprocessingDirective) {
SourceLocation L = cxcursor::getCursorPreprocessingDirective(C).getBegin();
return cxloc::translateSourceLocation(getCursorContext(C), L);
}
if (!getCursorDecl(C))
return clang_getNullLocation();
@ -1693,6 +1704,11 @@ CXSourceRange clang_getCursorExtent(CXCursor C) {
return cxloc::translateSourceRange(getCursorContext(C),
getCursorStmt(C)->getSourceRange());
if (C.kind == CXCursor_PreprocessingDirective) {
SourceRange R = cxcursor::getCursorPreprocessingDirective(C);
return cxloc::translateSourceRange(getCursorContext(C), R);
}
if (!getCursorDecl(C))
return clang_getNullRange();
@ -2216,7 +2232,8 @@ void clang_annotateTokens(CXTranslationUnit TU,
ASTUnit::ConcurrencyCheck Check(*CXXUnit);
// Annotate all of the source locations in the region of interest that map
// Annotate all of the source locations in the region of interest that map to
// a specific cursor.
SourceRange RegionOfInterest;
RegionOfInterest.setBegin(
cxloc::translateSourceLocation(clang_getTokenLocation(TU, Tokens[0])));
@ -2224,23 +2241,114 @@ void clang_annotateTokens(CXTranslationUnit TU,
= cxloc::translateSourceLocation(clang_getTokenLocation(TU,
Tokens[NumTokens - 1]));
RegionOfInterest.setEnd(CXXUnit->getPreprocessor().getLocForEndOfToken(End));
// FIXME: Would be great to have a "hint" cursor, then walk from that
// hint cursor upward until we find a cursor whose source range encloses
// the region of interest, rather than starting from the translation unit.
AnnotateTokensData Annotated;
CXCursor Parent = clang_getTranslationUnitCursor(CXXUnit);
CursorVisitor AnnotateVis(CXXUnit, AnnotateTokensVisitor, &Annotated,
Decl::MaxPCHLevel, RegionOfInterest);
AnnotateVis.VisitChildren(Parent);
// Look for macro instantiations and preprocessing directives in the
// source range containing the annotated tokens. We do this by re-lexing the
// tokens in the source range.
SourceManager &SourceMgr = CXXUnit->getSourceManager();
std::pair<FileID, unsigned> BeginLocInfo
= SourceMgr.getDecomposedLoc(RegionOfInterest.getBegin());
std::pair<FileID, unsigned> EndLocInfo
= SourceMgr.getDecomposedLoc(RegionOfInterest.getEnd());
bool RelexOkay = true;
// Cannot re-tokenize across files.
if (BeginLocInfo.first != EndLocInfo.first)
RelexOkay = false;
llvm::StringRef Buffer;
if (RelexOkay) {
// Create a lexer
bool Invalid = false;
Buffer = SourceMgr.getBufferData(BeginLocInfo.first, &Invalid);
if (Invalid)
RelexOkay = false;
}
if (RelexOkay) {
Lexer Lex(SourceMgr.getLocForStartOfFile(BeginLocInfo.first),
CXXUnit->getASTContext().getLangOptions(),
Buffer.begin(), Buffer.data() + BeginLocInfo.second, Buffer.end());
Lex.SetCommentRetentionState(true);
// Lex tokens in raw mode until we hit the end of the range, to avoid
// entering #includes or expanding macros.
std::vector<Token> TokenStream;
const char *EffectiveBufferEnd = Buffer.data() + EndLocInfo.second;
Preprocessor &PP = CXXUnit->getPreprocessor();
while (Lex.getBufferLocation() <= EffectiveBufferEnd) {
Token Tok;
Lex.LexFromRawLexer(Tok);
reprocess:
if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
// We have found a preprocessing directive. Gobble it up so that we
// don't see it while preprocessing these tokens later, but keep track of
// all of the token locations inside this preprocessing directive so that
// we can annotate them appropriately.
//
// FIXME: Some simple tests here could identify macro definitions and
// #undefs, to provide specific cursor kinds for those.
std::vector<SourceLocation> Locations;
do {
Locations.push_back(Tok.getLocation());
Lex.LexFromRawLexer(Tok);
} while (!Tok.isAtStartOfLine() && !Tok.is(tok::eof));
using namespace cxcursor;
CXCursor Cursor
= MakePreprocessingDirectiveCursor(SourceRange(Locations.front(),
Locations.back()),
CXXUnit);
for (unsigned I = 0, N = Locations.size(); I != N; ++I) {
Annotated[Locations[I].getRawEncoding()] = Cursor;
}
if (Tok.is(tok::eof))
break;
if (Tok.isAtStartOfLine())
goto reprocess;
continue;
}
// If this is a ## token, change its kind to unknown so that repreprocessing
// it will not produce an error.
if (Tok.is(tok::hashhash))
Tok.setKind(tok::unknown);
// If this raw token is an identifier, the raw lexer won't have looked up
// the corresponding identifier info for it. Do this now so that it will be
// macro expanded when we re-preprocess it.
if (Tok.is(tok::identifier)) {
// Change the kind of this identifier to the appropriate token kind, e.g.
// turning "for" into a keyword.
Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
}
TokenStream.push_back(Tok);
if (Tok.is(tok::eof))
break;
}
}
for (unsigned I = 0; I != NumTokens; ++I) {
// Determine whether we saw a cursor at this token's location.
AnnotateTokensData::iterator Pos = Annotated.find(Tokens[I].int_data[1]);
if (Pos == Annotated.end())
continue;
Cursors[I] = Pos->second;
}
}
}
void clang_disposeTokens(CXTranslationUnit TU,

View File

@ -70,6 +70,7 @@ _clang_isCursorDefinition
_clang_isDeclaration
_clang_isExpression
_clang_isInvalid
_clang_isPreprocessing
_clang_isReference
_clang_isStatement
_clang_isTranslationUnit

View File

@ -296,6 +296,24 @@ cxcursor::getCursorTypeRef(CXCursor C) {
reinterpret_cast<uintptr_t>(C.data[1])));
}
CXCursor cxcursor::MakePreprocessingDirectiveCursor(SourceRange Range,
ASTUnit *TU) {
CXCursor C = { CXCursor_PreprocessingDirective,
{ reinterpret_cast<void *>(Range.getBegin().getRawEncoding()),
reinterpret_cast<void *>(Range.getEnd().getRawEncoding()),
TU }
};
return C;
}
SourceRange cxcursor::getCursorPreprocessingDirective(CXCursor C) {
assert(C.kind == CXCursor_PreprocessingDirective);
return SourceRange(SourceLocation::getFromRawEncoding(
reinterpret_cast<uintptr_t> (C.data[0])),
SourceLocation::getFromRawEncoding(
reinterpret_cast<uintptr_t> (C.data[1])));
}
Decl *cxcursor::getCursorDecl(CXCursor Cursor) {
return (Decl *)Cursor.data[0];
}

View File

@ -73,6 +73,12 @@ CXCursor MakeCursorTypeRef(TypeDecl *Type, SourceLocation Loc, ASTUnit *TU);
/// and optionally the location where the reference occurred.
std::pair<TypeDecl *, SourceLocation> getCursorTypeRef(CXCursor C);
/// \brief Create a preprocessing directive cursor.
CXCursor MakePreprocessingDirectiveCursor(SourceRange Range, ASTUnit *TU);
/// \brief Unpack a given preprocessing directive to retrieve its source range.
SourceRange getCursorPreprocessingDirective(CXCursor C);
Decl *getCursorDecl(CXCursor Cursor);
Expr *getCursorExpr(CXCursor Cursor);
Stmt *getCursorStmt(CXCursor Cursor);