forked from OSchip/llvm-project
Implement comment saving mode: the -C and -CC options.
llvm-svn: 38783
This commit is contained in:
parent
2be4115465
commit
457fc15bc5
|
@ -99,6 +99,11 @@ static void OutputString(const char *Ptr, unsigned Size) {
|
|||
|
||||
static cl::opt<bool>
|
||||
DisableLineMarkers("P", cl::desc("Disable linemarker output in -E mode"));
|
||||
static cl::opt<bool>
|
||||
EnableCommentOutput("C", cl::desc("Enable comment output in -E mode"));
|
||||
static cl::opt<bool>
|
||||
EnableMacroCommentOutput("CC", cl::desc("Enable comment output in -E mode, "
|
||||
"even from macro expansions"));
|
||||
|
||||
static unsigned EModeCurLine;
|
||||
static std::string EModeCurFilename;
|
||||
|
@ -357,7 +362,12 @@ static bool AvoidConcat(const LexerToken &PrevTok, const LexerToken &Tok,
|
|||
|
||||
/// DoPrintPreprocessedInput - This implements -E mode.
|
||||
///
|
||||
void clang::DoPrintPreprocessedInput(Preprocessor &PP) {
|
||||
void clang::DoPrintPreprocessedInput(Preprocessor &PP, LangOptions &Options) {
|
||||
if (EnableCommentOutput) // -C specified?
|
||||
Options.KeepComments = 1;
|
||||
if (EnableMacroCommentOutput) // -CC specified?
|
||||
Options.KeepComments = Options.KeepMacroComments = 1;
|
||||
|
||||
InitOutputBuffer();
|
||||
|
||||
LexerToken Tok, PrevTok;
|
||||
|
|
|
@ -703,7 +703,7 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
case PrintPreprocessedInput: // -E mode.
|
||||
DoPrintPreprocessedInput(PP);
|
||||
DoPrintPreprocessedInput(PP, Options);
|
||||
break;
|
||||
|
||||
case DumpTokens: { // Token dump mode.
|
||||
|
|
|
@ -16,10 +16,11 @@
|
|||
|
||||
namespace llvm {
|
||||
namespace clang {
|
||||
class Preprocessor;
|
||||
class Preprocessor;
|
||||
class LangOptions;
|
||||
|
||||
/// DoPrintPreprocessedInput - Implement -E mode.
|
||||
void DoPrintPreprocessedInput(Preprocessor &PP);
|
||||
void DoPrintPreprocessedInput(Preprocessor &PP, LangOptions &Options);
|
||||
|
||||
} // end namespace clang
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -65,6 +65,9 @@ Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp,
|
|||
// to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
|
||||
// or otherwise skipping over tokens.
|
||||
LexingRawMode = false;
|
||||
|
||||
// Default to keeping comments if requested.
|
||||
KeepCommentMode = Features.KeepComments;
|
||||
}
|
||||
|
||||
/// Stringify - Convert the specified string into a C string, with surrounding
|
||||
|
@ -587,13 +590,15 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
|
|||
|
||||
// If the next token is obviously a // or /* */ comment, skip it efficiently
|
||||
// too (without going through the big switch stmt).
|
||||
if (Char == '/' && CurPtr[1] == '/') {
|
||||
if (Char == '/' && CurPtr[1] == '/' && !KeepCommentMode) {
|
||||
BufferPtr = CurPtr;
|
||||
return SkipBCPLComment(Result, CurPtr+1);
|
||||
SkipBCPLComment(Result, CurPtr+1);
|
||||
return;
|
||||
}
|
||||
if (Char == '/' && CurPtr[1] == '*') {
|
||||
if (Char == '/' && CurPtr[1] == '*' && !KeepCommentMode) {
|
||||
BufferPtr = CurPtr;
|
||||
return SkipBlockComment(Result, CurPtr+2);
|
||||
SkipBlockComment(Result, CurPtr+2);
|
||||
return;
|
||||
}
|
||||
BufferPtr = CurPtr;
|
||||
}
|
||||
|
@ -601,7 +606,7 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
|
|||
// SkipBCPLComment - We have just read the // characters from input. Skip until
|
||||
// we find the newline character thats terminate the comment. Then update
|
||||
/// BufferPtr and return.
|
||||
void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
|
||||
bool Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
|
||||
// If BCPL comments aren't explicitly enabled for this language, emit an
|
||||
// extension warning.
|
||||
if (!Features.BCPLComment) {
|
||||
|
@ -648,16 +653,20 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
|
|||
}
|
||||
}
|
||||
|
||||
if (CurPtr == BufferEnd+1) goto FoundEOF;
|
||||
if (CurPtr == BufferEnd+1) { --CurPtr; break; }
|
||||
} while (C != '\n' && C != '\r');
|
||||
|
||||
// Found and did not consume a newline.
|
||||
// Found but did not consume the newline.
|
||||
|
||||
// If we are returning comments as tokens, return this comment as a token.
|
||||
if (KeepCommentMode)
|
||||
return SaveBCPLComment(Result, CurPtr);
|
||||
|
||||
// If we are inside a preprocessor directive and we see the end of line,
|
||||
// return immediately, so that the lexer can return this as an EOM token.
|
||||
if (ParsingPreprocessorDirective) {
|
||||
if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
|
||||
BufferPtr = CurPtr;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, eat the \n character. We don't care if this is a \n\r or
|
||||
|
@ -674,15 +683,33 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
|
|||
// big switch, handle it efficiently now.
|
||||
if (isWhitespace(*CurPtr)) {
|
||||
Result.SetFlag(LexerToken::LeadingSpace);
|
||||
return SkipWhitespace(Result, CurPtr+1);
|
||||
SkipWhitespace(Result, CurPtr+1);
|
||||
return true;
|
||||
}
|
||||
|
||||
BufferPtr = CurPtr;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
FoundEOF: // If we ran off the end of the buffer, return EOF.
|
||||
BufferPtr = CurPtr-1;
|
||||
return;
|
||||
/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
|
||||
/// an appropriate way and return it.
|
||||
bool Lexer::SaveBCPLComment(LexerToken &Result, const char *CurPtr) {
|
||||
Result.SetKind(tok::comment);
|
||||
FormTokenWithChars(Result, CurPtr);
|
||||
|
||||
// If this BCPL-style comment is in a macro definition, transmogrify it into
|
||||
// a C-style block comment.
|
||||
if (ParsingPreprocessorDirective) {
|
||||
std::string Spelling = PP.getSpelling(Result);
|
||||
assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
|
||||
Spelling[1] = '*'; // Change prefix to "/*".
|
||||
Spelling += "*/"; // add suffix.
|
||||
|
||||
Result.SetLocation(PP.CreateString(&Spelling[0], Spelling.size(),
|
||||
Result.getLocation()));
|
||||
Result.SetLength(Spelling.size());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
|
||||
|
@ -748,7 +775,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
|
|||
/// because they cannot cause the comment to end. The only thing that can
|
||||
/// happen is the comment could end with an escaped newline between the */ end
|
||||
/// of comment.
|
||||
void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
|
||||
bool Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
|
||||
// Scan one character past where we should, looking for a '/' character. Once
|
||||
// we find it, check to see if it was preceeded by a *. This common
|
||||
// optimization helps people who like to put a lot of * characters in their
|
||||
|
@ -757,7 +784,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
|
|||
if (C == 0 && CurPtr == BufferEnd+1) {
|
||||
Diag(BufferPtr, diag::err_unterminated_block_comment);
|
||||
BufferPtr = CurPtr-1;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
|
@ -789,22 +816,31 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
|
|||
// after the /*, but this would involve lexing a lot of what really is the
|
||||
// comment, which surely would confuse the parser.
|
||||
BufferPtr = CurPtr-1;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
C = *CurPtr++;
|
||||
}
|
||||
|
||||
// If we are returning comments as tokens, return this comment as a token.
|
||||
if (KeepCommentMode) {
|
||||
Result.SetKind(tok::comment);
|
||||
FormTokenWithChars(Result, CurPtr);
|
||||
return false;
|
||||
}
|
||||
|
||||
// It is common for the tokens immediately after a /**/ comment to be
|
||||
// whitespace. Instead of going through the big switch, handle it
|
||||
// efficiently now.
|
||||
if (isHorizontalWhitespace(*CurPtr)) {
|
||||
Result.SetFlag(LexerToken::LeadingSpace);
|
||||
return SkipWhitespace(Result, CurPtr+1);
|
||||
SkipWhitespace(Result, CurPtr+1);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Otherwise, just return so that the next character will be lexed as a token.
|
||||
BufferPtr = CurPtr;
|
||||
Result.SetFlag(LexerToken::LeadingSpace);
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -920,6 +956,9 @@ bool Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) {
|
|||
Result.SetKind(tok::eom);
|
||||
// Update the location of token as well as BufferPtr.
|
||||
FormTokenWithChars(Result, CurPtr);
|
||||
|
||||
// Restore comment saving mode, in case it was disabled for directive.
|
||||
KeepCommentMode = Features.KeepComments;
|
||||
return true; // Have a token.
|
||||
}
|
||||
|
||||
|
@ -1035,6 +1074,9 @@ LexNextToken:
|
|||
// Done parsing the "line".
|
||||
ParsingPreprocessorDirective = false;
|
||||
|
||||
// Restore comment saving mode, in case it was disabled for directive.
|
||||
KeepCommentMode = Features.KeepComments;
|
||||
|
||||
// Since we consumed a newline, we are back at the start of a line.
|
||||
IsAtStartOfLine = true;
|
||||
|
||||
|
@ -1211,13 +1253,13 @@ LexNextToken:
|
|||
// 6.4.9: Comments
|
||||
Char = getCharAndSize(CurPtr, SizeTmp);
|
||||
if (Char == '/') { // BCPL comment.
|
||||
Result.SetFlag(LexerToken::LeadingSpace);
|
||||
SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
|
||||
goto LexNextToken; // GCC isn't tail call eliminating.
|
||||
if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
|
||||
goto LexNextToken; // GCC isn't tail call eliminating.
|
||||
return; // KeepCommentMode
|
||||
} else if (Char == '*') { // /**/ comment.
|
||||
Result.SetFlag(LexerToken::LeadingSpace);
|
||||
SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
|
||||
goto LexNextToken; // GCC isn't tail call eliminating.
|
||||
if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
|
||||
goto LexNextToken; // GCC isn't tail call eliminating.
|
||||
return; // KeepCommentMode
|
||||
} else if (Char == '=') {
|
||||
Result.SetKind(tok::slashequal);
|
||||
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
|
||||
|
|
|
@ -755,6 +755,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(LexerToken &MacroName,
|
|||
return 0;
|
||||
}
|
||||
// Otherwise, continue to add the tokens to this variable argument.
|
||||
} else if (Tok.getKind() == tok::comment && !Features.KeepMacroComments) {
|
||||
// If this is a comment token in the argument list and we're just in
|
||||
// -C mode (not -CC mode), discard the comment.
|
||||
continue;
|
||||
}
|
||||
|
||||
ArgTokens.push_back(Tok);
|
||||
|
@ -1221,6 +1225,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
|||
// directive mode. Tell the lexer this so any newlines we see will be
|
||||
// converted into an EOM token (this terminates the macro).
|
||||
CurLexer->ParsingPreprocessorDirective = true;
|
||||
CurLexer->KeepCommentMode = false;
|
||||
|
||||
|
||||
// Read the next token, the directive flavor.
|
||||
LexUnexpandedToken(Tok);
|
||||
|
@ -1229,6 +1235,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
|||
// something bogus), skip it.
|
||||
if (Tok.getKind() != tok::identifier) {
|
||||
CurLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
CurLexer->KeepCommentMode = Features.KeepComments;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1242,6 +1250,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
|||
if (FirstChar >= 'a' && FirstChar <= 'z' &&
|
||||
FirstChar != 'i' && FirstChar != 'e') {
|
||||
CurLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
CurLexer->KeepCommentMode = Features.KeepComments;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1261,6 +1271,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
|||
IdLen = DirectiveStr.size();
|
||||
if (IdLen >= 20) {
|
||||
CurLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
CurLexer->KeepCommentMode = Features.KeepComments;
|
||||
continue;
|
||||
}
|
||||
memcpy(Directive, &DirectiveStr[0], IdLen);
|
||||
|
@ -1339,6 +1351,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
|||
}
|
||||
|
||||
CurLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
CurLexer->KeepCommentMode = Features.KeepComments;
|
||||
}
|
||||
|
||||
// Finally, if we are out of the conditional (saw an #endif or ran off the end
|
||||
|
@ -1698,6 +1712,10 @@ void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
|
|||
if (MacroNameTok.getKind() == tok::eom)
|
||||
return;
|
||||
|
||||
// If we are supposed to keep comments in #defines, reenable comment saving
|
||||
// mode.
|
||||
CurLexer->KeepCommentMode = Features.KeepMacroComments;
|
||||
|
||||
MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation());
|
||||
|
||||
LexerToken Tok;
|
||||
|
|
|
@ -67,7 +67,6 @@ Lexer:
|
|||
Preprocessor:
|
||||
* #assert/#unassert
|
||||
* #line / #file directives
|
||||
* -C output mode in -E mode.
|
||||
* MSExtension: "L#param" stringizes to a wide string literal.
|
||||
|
||||
Traditional Preprocessor:
|
||||
|
|
|
@ -28,6 +28,9 @@ TOK(unknown) // Not a token.
|
|||
TOK(eof) // End of file.
|
||||
TOK(eom) // End of macro (end of line inside a macro).
|
||||
|
||||
// C99 6.4.9: Comments.
|
||||
TOK(comment) // Comment (only in -E -C[C] mode)
|
||||
|
||||
// C99 6.4.2: Identifiers.
|
||||
TOK(identifier) // abcde123
|
||||
|
||||
|
|
|
@ -26,23 +26,27 @@ class Preprocessor;
|
|||
class SourceBuffer;
|
||||
|
||||
struct LangOptions {
|
||||
unsigned Trigraphs : 1; // Trigraphs in source files.
|
||||
unsigned BCPLComment : 1; // BCPL-style // comments.
|
||||
unsigned DollarIdents : 1; // '$' allowed in identifiers.
|
||||
unsigned Digraphs : 1; // When added to C? C99?
|
||||
unsigned HexFloats : 1; // C99 Hexadecimal float constants.
|
||||
unsigned C99 : 1; // C99 Support
|
||||
unsigned Microsoft : 1; // Microsoft extensions.
|
||||
unsigned CPlusPlus : 1; // C++ Support
|
||||
unsigned CPPMinMax : 1; // C++ <?=, >?= tokens.
|
||||
unsigned NoExtensions : 1; // All extensions are disabled, strict mode.
|
||||
unsigned Trigraphs : 1; // Trigraphs in source files.
|
||||
unsigned BCPLComment : 1; // BCPL-style // comments.
|
||||
unsigned DollarIdents : 1; // '$' allowed in identifiers.
|
||||
unsigned Digraphs : 1; // When added to C? C99?
|
||||
unsigned HexFloats : 1; // C99 Hexadecimal float constants.
|
||||
unsigned C99 : 1; // C99 Support
|
||||
unsigned Microsoft : 1; // Microsoft extensions.
|
||||
unsigned CPlusPlus : 1; // C++ Support
|
||||
unsigned CPPMinMax : 1; // C++ <?=, >?= tokens.
|
||||
unsigned NoExtensions : 1; // All extensions are disabled, strict mode.
|
||||
|
||||
unsigned ObjC1 : 1; // Objective C 1 support enabled.
|
||||
unsigned ObjC2 : 1; // Objective C 2 support enabled (implies ObjC1).
|
||||
unsigned ObjC1 : 1; // Objective C 1 support enabled.
|
||||
unsigned ObjC2 : 1; // Objective C 2 support enabled.
|
||||
|
||||
unsigned KeepComments : 1; // Keep comments ("-C") mode.
|
||||
unsigned KeepMacroComments : 1; // Keep macro-exp comments ("-CC") mode.
|
||||
|
||||
LangOptions() {
|
||||
Trigraphs = BCPLComment = DollarIdents = Digraphs = ObjC1 = ObjC2 = 0;
|
||||
C99 = Microsoft = CPlusPlus = CPPMinMax = NoExtensions = 0;
|
||||
KeepComments = KeepMacroComments = 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -87,6 +91,10 @@ class Lexer {
|
|||
/// on an unterminated '/*' comment.
|
||||
bool LexingRawMode;
|
||||
|
||||
/// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments,
|
||||
/// and return them as tokens. This is used for -C and -CC modes.
|
||||
bool KeepCommentMode;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Context that changes as the file is lexed.
|
||||
// NOTE: any state that mutates when in raw mode must have save/restore code
|
||||
|
@ -353,9 +361,9 @@ private:
|
|||
bool LexEndOfFile (LexerToken &Result, const char *CurPtr);
|
||||
|
||||
void SkipWhitespace (LexerToken &Result, const char *CurPtr);
|
||||
void SkipBCPLComment (LexerToken &Result, const char *CurPtr);
|
||||
void SkipBlockComment (LexerToken &Result, const char *CurPtr);
|
||||
|
||||
bool SkipBCPLComment (LexerToken &Result, const char *CurPtr);
|
||||
bool SkipBlockComment (LexerToken &Result, const char *CurPtr);
|
||||
bool SaveBCPLComment (LexerToken &Result, const char *CurPtr);
|
||||
|
||||
/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
|
||||
/// (potentially) macro expand the filename. If the sequence parsed is not
|
||||
|
|
Loading…
Reference in New Issue