Comment parsing: refactor handling of command markers in AST

* Use the term 'command marker', because the semantics of 'backslash' and 'at'
  commands are the same.  (Talking about 'at commands' makes them look like a
  special entity.)

* Sink the flag down into bitfields, reducing the size of AST nodes.

* Change the flag into an enum for clarity.  Boolean function parameters are
  not very clear.

* Add unittests for new tok::at_command tokens.

llvm-svn: 176461
This commit is contained in:
Dmitri Gribenko 2013-03-04 23:06:15 +00:00
parent 208b5beb60
commit bcf7f4d3bb
7 changed files with 154 additions and 69 deletions

View File

@ -28,6 +28,26 @@ class TemplateParameterList;
namespace comments {
class FullComment;
/// Describes the syntax that was used in a documentation command.
///
/// Exact values of this enumeration are important because they used to select
/// parts of diagnostic messages. Audit diagnostics before changing or adding
/// a new value.
enum CommandMarkerKind {
/// Command started with a backslash character:
/// \code
/// \foo
/// \endcode
CMK_Backslash = 0,
/// Command started with an 'at' character:
/// \code
/// @foo
/// \endcode
CMK_At = 1
};
/// Any part of the comment.
/// Abstract class.
class Comment {
@ -110,8 +130,12 @@ protected:
unsigned : NumCommentBits;
unsigned CommandID : 8;
/// Describes the syntax that was used in a documentation command.
/// Contains values from CommandMarkerKind enum.
unsigned CommandMarker : 1;
};
enum { NumBlockCommandCommentBits = NumCommentBits + 8 };
enum { NumBlockCommandCommentBits = NumCommentBits + 9 };
class ParamCommandCommentBitfields {
friend class ParamCommandComment;
@ -570,30 +594,29 @@ protected:
/// Paragraph argument.
ParagraphComment *Paragraph;
/// Header Doc command, if true
bool AtCommand;
BlockCommandComment(CommentKind K,
SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) :
CommandMarkerKind CommandMarker) :
BlockContentComment(K, LocBegin, LocEnd),
Paragraph(NULL), AtCommand(AtCommand) {
Paragraph(NULL) {
setLocation(getCommandNameBeginLoc());
BlockCommandCommentBits.CommandID = CommandID;
BlockCommandCommentBits.CommandMarker = CommandMarker;
}
public:
BlockCommandComment(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) :
CommandMarkerKind CommandMarker) :
BlockContentComment(BlockCommandCommentKind, LocBegin, LocEnd),
Paragraph(NULL), AtCommand(AtCommand) {
Paragraph(NULL) {
setLocation(getCommandNameBeginLoc());
BlockCommandCommentBits.CommandID = CommandID;
BlockCommandCommentBits.CommandMarker = CommandMarker;
}
static bool classof(const Comment *C) {
@ -662,9 +685,10 @@ public:
if (NewLocEnd.isValid())
setSourceRange(SourceRange(getLocStart(), NewLocEnd));
}
bool getAtCommand() const LLVM_READONLY {
return AtCommand;
CommandMarkerKind getCommandMarker() const LLVM_READONLY {
return static_cast<CommandMarkerKind>(
BlockCommandCommentBits.CommandMarker);
}
};
@ -680,9 +704,9 @@ public:
ParamCommandComment(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) :
CommandMarkerKind CommandMarker) :
BlockCommandComment(ParamCommandCommentKind, LocBegin, LocEnd,
CommandID, AtCommand),
CommandID, CommandMarker),
ParamIndex(InvalidParamIndex) {
ParamCommandCommentBits.Direction = In;
ParamCommandCommentBits.IsDirectionExplicit = false;
@ -763,9 +787,9 @@ public:
TParamCommandComment(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) :
CommandMarkerKind CommandMarker) :
BlockCommandComment(TParamCommandCommentKind, LocBegin, LocEnd, CommandID,
AtCommand)
CommandMarker)
{ }
static bool classof(const Comment *C) {
@ -846,7 +870,8 @@ public:
SourceLocation LocEnd,
unsigned CommandID) :
BlockCommandComment(VerbatimBlockCommentKind,
LocBegin, LocEnd, CommandID, false)
LocBegin, LocEnd, CommandID,
CMK_At) // FIXME: improve source fidelity.
{ }
static bool classof(const Comment *C) {
@ -899,7 +924,8 @@ public:
StringRef Text) :
BlockCommandComment(VerbatimLineCommentKind,
LocBegin, LocEnd,
CommandID, false),
CommandID,
CMK_At), // FIXME: improve source fidelity.
Text(Text),
TextBegin(TextBegin)
{ }

View File

@ -34,9 +34,9 @@ enum TokenKind {
eof,
newline,
text,
unknown_command, // Command that does not have an ID.
backslash_command, // \Command with an ID.
at_command, // @command with an ID.
unknown_command, // Command that does not have an ID.
backslash_command, // Command with an ID, that used backslash marker.
at_command, // Command with an ID, that used 'at' marker.
verbatim_block_begin,
verbatim_block_line,
verbatim_block_end,

View File

@ -97,7 +97,7 @@ public:
BlockCommandComment *actOnBlockCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand);
CommandMarkerKind CommandMarker);
void actOnBlockCommandArgs(BlockCommandComment *Command,
ArrayRef<BlockCommandComment::Argument> Args);
@ -108,7 +108,7 @@ public:
ParamCommandComment *actOnParamCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand);
CommandMarkerKind CommandMarker);
void actOnParamCommandDirectionArg(ParamCommandComment *Command,
SourceLocation ArgLocBegin,
@ -126,7 +126,7 @@ public:
TParamCommandComment *actOnTParamCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand);
CommandMarkerKind CommandMarker);
void actOnTParamCommandParamNameArg(TParamCommandComment *Command,
SourceLocation ArgLocBegin,

View File

@ -298,7 +298,11 @@ void Lexer::lexCommentText(Token &T) {
switch(*TokenPtr) {
case '\\':
case '@': {
bool AtCommand = (*TokenPtr == '@');
// Commands that start with a backslash and commands that start with
// 'at' have equivalent semantics. But we keep information about the
// exact syntax in AST for comments.
tok::TokenKind CommandKind =
(*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
TokenPtr++;
if (TokenPtr == CommentEnd) {
formTextToken(T, TokenPtr);
@ -359,9 +363,7 @@ void Lexer::lexCommentText(Token &T) {
setupAndLexVerbatimLine(T, TokenPtr, Info);
return;
}
formTokenWithChars(T, TokenPtr,
(AtCommand ? tok::at_command
: tok::backslash_command));
formTokenWithChars(T, TokenPtr, CommandKind);
T.setCommandID(Info->getID());
return;
}

View File

@ -308,23 +308,25 @@ BlockCommandComment *Parser::parseBlockCommand() {
bool IsParam = false;
bool IsTParam = false;
const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
CommandMarkerKind CommandMarker =
Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
if (Info->IsParamCommand) {
IsParam = true;
PC = S.actOnParamCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandID(),
Tok.is(tok::at_command));
CommandMarker);
} else if (Info->IsTParamCommand) {
IsTParam = true;
TPC = S.actOnTParamCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandID(),
Tok.is(tok::at_command));
CommandMarker);
} else {
BC = S.actOnBlockCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandID(),
Tok.is(tok::at_command));
CommandMarker);
}
consumeToken();

View File

@ -47,11 +47,13 @@ ParagraphComment *Sema::actOnParagraphComment(
return new (Allocator) ParagraphComment(Content);
}
BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) {
return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
BlockCommandComment *Sema::actOnBlockCommandStart(
SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
CommandMarkerKind CommandMarker) {
return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID,
CommandMarker);
}
void Sema::actOnBlockCommandArgs(BlockCommandComment *Command,
@ -68,17 +70,19 @@ void Sema::actOnBlockCommandFinish(BlockCommandComment *Command,
checkDeprecatedCommand(Command);
}
ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) {
ParamCommandComment *Sema::actOnParamCommandStart(
SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
CommandMarkerKind CommandMarker) {
ParamCommandComment *Command =
new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID,
CommandMarker);
if (!isFunctionDecl())
Diag(Command->getLocation(),
diag::warn_doc_param_not_attached_to_a_function_decl)
<< AtCommand
<< CommandMarker
<< Command->getCommandNameRange(Traits);
return Command;
@ -163,17 +167,19 @@ void Sema::actOnParamCommandFinish(ParamCommandComment *Command,
checkBlockCommandEmptyParagraph(Command);
}
TParamCommandComment *Sema::actOnTParamCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
bool AtCommand) {
TParamCommandComment *Sema::actOnTParamCommandStart(
SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
CommandMarkerKind CommandMarker) {
TParamCommandComment *Command =
new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID,
CommandMarker);
if (!isTemplateOrSpecialization())
Diag(Command->getLocation(),
diag::warn_doc_tparam_not_attached_to_a_template_decl)
<< AtCommand
<< CommandMarker
<< Command->getCommandNameRange(Traits);
return Command;
@ -437,7 +443,7 @@ void Sema::checkBlockCommandEmptyParagraph(BlockCommandComment *Command) {
if (!DiagLoc.isValid())
DiagLoc = Command->getCommandNameRange(Traits).getEnd();
Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph)
<< Command->getAtCommand()
<< Command->getCommandMarker()
<< Command->getCommandName(Traits)
<< Command->getSourceRange();
}
@ -465,7 +471,7 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) {
}
Diag(Command->getLocation(),
diag::warn_doc_returns_attached_to_a_void_function)
<< Command->getAtCommand()
<< Command->getCommandMarker()
<< Command->getCommandName(Traits)
<< DiagKind
<< Command->getSourceRange();
@ -477,7 +483,7 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) {
Diag(Command->getLocation(),
diag::warn_doc_returns_not_attached_to_a_function_decl)
<< Command->getAtCommand()
<< Command->getCommandMarker()
<< Command->getCommandName(Traits)
<< Command->getSourceRange();
}
@ -510,18 +516,18 @@ void Sema::checkBlockCommandDuplicate(const BlockCommandComment *Command) {
StringRef CommandName = Command->getCommandName(Traits);
StringRef PrevCommandName = PrevCommand->getCommandName(Traits);
Diag(Command->getLocation(), diag::warn_doc_block_command_duplicate)
<< Command->getAtCommand()
<< Command->getCommandMarker()
<< CommandName
<< Command->getSourceRange();
if (CommandName == PrevCommandName)
Diag(PrevCommand->getLocation(), diag::note_doc_block_command_previous)
<< PrevCommand->getAtCommand()
<< PrevCommand->getCommandMarker()
<< PrevCommandName
<< PrevCommand->getSourceRange();
else
Diag(PrevCommand->getLocation(),
diag::note_doc_block_command_previous_alias)
<< PrevCommand->getAtCommand()
<< PrevCommand->getCommandMarker()
<< PrevCommandName
<< CommandName;
}

View File

@ -301,8 +301,10 @@ TEST_F(CommentLexerTest, DoxygenCommand3) {
// Doxygen escape sequences.
TEST_F(CommentLexerTest, DoxygenCommand4) {
const char *Source =
"/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::";
const char *Sources[] = {
"/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
"/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
};
const char *Text[] = {
" ",
"\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
@ -310,16 +312,18 @@ TEST_F(CommentLexerTest, DoxygenCommand4) {
"::", ""
};
std::vector<Token> Toks;
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
std::vector<Token> Toks;
lexString(Source, Toks);
lexString(Sources[i], Toks);
ASSERT_EQ(array_lengthof(Text), Toks.size());
ASSERT_EQ(array_lengthof(Text), Toks.size());
for (size_t i = 0, e = Toks.size(); i != e; i++) {
if(Toks[i].is(tok::text))
ASSERT_EQ(StringRef(Text[i]), Toks[i].getText())
<< "index " << i;
for (size_t j = 0, e = Toks.size(); j != e; j++) {
if(Toks[j].is(tok::text))
ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
<< "index " << i;
}
}
}
@ -404,6 +408,38 @@ TEST_F(CommentLexerTest, DoxygenCommand7) {
}
TEST_F(CommentLexerTest, DoxygenCommand8) {
const char *Source = "/// @em@em @em\t@em\n";
std::vector<Token> Toks;
lexString(Source, Toks);
ASSERT_EQ(8U, Toks.size());
ASSERT_EQ(tok::text, Toks[0].getKind());
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::at_command, Toks[1].getKind());
ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
ASSERT_EQ(tok::at_command, Toks[2].getKind());
ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
ASSERT_EQ(tok::text, Toks[3].getKind());
ASSERT_EQ(StringRef(" "), Toks[3].getText());
ASSERT_EQ(tok::at_command, Toks[4].getKind());
ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
ASSERT_EQ(tok::text, Toks[5].getKind());
ASSERT_EQ(StringRef("\t"), Toks[5].getText());
ASSERT_EQ(tok::at_command, Toks[6].getKind());
ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
ASSERT_EQ(tok::newline, Toks[7].getKind());
}
TEST_F(CommentLexerTest, DoxygenCommand9) {
const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
std::vector<Token> Toks;
@ -435,7 +471,7 @@ TEST_F(CommentLexerTest, DoxygenCommand8) {
ASSERT_EQ(tok::newline, Toks[7].getKind());
}
TEST_F(CommentLexerTest, DoxygenCommand9) {
TEST_F(CommentLexerTest, DoxygenCommand10) {
const char *Source = "// \\c\n";
std::vector<Token> Toks;
@ -453,7 +489,9 @@ TEST_F(CommentLexerTest, DoxygenCommand9) {
}
TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
const char *Source = "/// \\NewBlockCommand Aaa.\n";
const char *Source =
"/// \\NewBlockCommand Aaa.\n"
"/// @NewBlockCommand Aaa.\n";
Traits.registerBlockCommand(StringRef("NewBlockCommand"));
@ -461,10 +499,10 @@ TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
lexString(Source, Toks);
ASSERT_EQ(4U, Toks.size());
ASSERT_EQ(8U, Toks.size());
ASSERT_EQ(tok::text, Toks[0].getKind());
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::text, Toks[0].getKind());
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
@ -472,7 +510,18 @@ TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
ASSERT_EQ(tok::text, Toks[2].getKind());
ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
ASSERT_EQ(tok::newline, Toks[3].getKind());
ASSERT_EQ(tok::newline, Toks[3].getKind());
ASSERT_EQ(tok::text, Toks[4].getKind());
ASSERT_EQ(StringRef(" "), Toks[4].getText());
ASSERT_EQ(tok::at_command, Toks[5].getKind());
ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
ASSERT_EQ(tok::text, Toks[6].getKind());
ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
ASSERT_EQ(tok::newline, Toks[7].getKind());
}
TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {